web2cbz: added archiving.
This commit is contained in:
parent
b4c1f75a2a
commit
8a08523145
1 changed files with 27 additions and 4 deletions
31
bin/web2cbz
31
bin/web2cbz
|
@ -7,6 +7,7 @@ require 'rubygems'
|
||||||
require 'bundler/setup'
|
require 'bundler/setup'
|
||||||
require 'nokogiri'
|
require 'nokogiri'
|
||||||
require 'open-uri'
|
require 'open-uri'
|
||||||
|
require 'zipruby'
|
||||||
|
|
||||||
config_path = ARGV.shift
|
config_path = ARGV.shift
|
||||||
config_fh = File.open config_path
|
config_fh = File.open config_path
|
||||||
|
@ -15,6 +16,8 @@ pp config_yml
|
||||||
|
|
||||||
class Page
|
class Page
|
||||||
attr_reader :url
|
attr_reader :url
|
||||||
|
attr_accessor :prev, :next
|
||||||
|
attr_accessor :index
|
||||||
|
|
||||||
|
|
||||||
def initialize url, config
|
def initialize url, config
|
||||||
|
@ -25,6 +28,7 @@ class Page
|
||||||
@next = nil
|
@next = nil
|
||||||
@first = nil
|
@first = nil
|
||||||
@last = nil
|
@last = nil
|
||||||
|
@index = 0
|
||||||
|
|
||||||
@doc = Nokogiri::HTML(open(url))
|
@doc = Nokogiri::HTML(open(url))
|
||||||
end
|
end
|
||||||
|
@ -32,7 +36,8 @@ class Page
|
||||||
def first
|
def first
|
||||||
return @first unless @first.nil?
|
return @first unless @first.nil?
|
||||||
first_url = @doc.xpath @config['first_xpath']
|
first_url = @doc.xpath @config['first_xpath']
|
||||||
#puts "first %s" % first_url.text
|
pp first_url.inspect
|
||||||
|
puts "first %s" % first_url.text
|
||||||
url = _make_url @url, first_url.text
|
url = _make_url @url, first_url.text
|
||||||
@first = Page.new url, @config
|
@first = Page.new url, @config
|
||||||
return @first
|
return @first
|
||||||
|
@ -41,7 +46,8 @@ class Page
|
||||||
def last
|
def last
|
||||||
return @last unless @last.nil?
|
return @last unless @last.nil?
|
||||||
last_url = @doc.xpath @config['last_xpath']
|
last_url = @doc.xpath @config['last_xpath']
|
||||||
#puts "last %s" % last_url.text
|
pp last_url.inspect
|
||||||
|
puts "last %s" % last_url.text
|
||||||
url = _make_url @url, last_url.text
|
url = _make_url @url, last_url.text
|
||||||
@last = Page.new url, @config
|
@last = Page.new url, @config
|
||||||
return @last
|
return @last
|
||||||
|
@ -50,18 +56,22 @@ class Page
|
||||||
def next
|
def next
|
||||||
return @next unless @next.nil?
|
return @next unless @next.nil?
|
||||||
next_url = @doc.xpath @config['next_xpath']
|
next_url = @doc.xpath @config['next_xpath']
|
||||||
#puts "next %s" % next_url.text
|
pp next_url.inspect
|
||||||
|
puts "next %s" % next_url.text
|
||||||
url = _make_url @url, next_url.text
|
url = _make_url @url, next_url.text
|
||||||
@next = Page.new url, @config
|
@next = Page.new url, @config
|
||||||
|
@next.prev = self
|
||||||
return @next
|
return @next
|
||||||
end
|
end
|
||||||
|
|
||||||
def prev
|
def prev
|
||||||
return @prev unless @prev.nil?
|
return @prev unless @prev.nil?
|
||||||
prev_url = @doc.xpath @config['prev_xpath']
|
prev_url = @doc.xpath @config['prev_xpath']
|
||||||
#puts "prev %s" % prev_url.text
|
pp prev_url.inspect
|
||||||
|
puts "prev %s" % prev_url.text
|
||||||
url = _make_url @url, prev_url.text
|
url = _make_url @url, prev_url.text
|
||||||
@prev = Page.new url, @config
|
@prev = Page.new url, @config
|
||||||
|
@prev.next = self
|
||||||
return @prev
|
return @prev
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -95,12 +105,25 @@ class Page
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
#FileUtils.mkdir_p config_yml['name']
|
||||||
page = Page.new config_yml['base_url'],config_yml
|
page = Page.new config_yml['base_url'],config_yml
|
||||||
page = page.first
|
page = page.first
|
||||||
|
|
||||||
|
ar = Zip::Archive.open( config_yml['name'] + '.zip', Zip::CREATE | Zip::TRUNC)
|
||||||
while not page.nil? do
|
while not page.nil? do
|
||||||
puts "PAGE %s" % page.url
|
puts "PAGE %s" % page.url
|
||||||
puts " image = %s" % page.image.inspect
|
puts " image = %s" % page.image.inspect
|
||||||
|
image_format = page.image.gsub(/^.*\.(.*?)$/,'\1')
|
||||||
|
image_path = File.join config_yml['name'], ("page_%04d.%s" % [page.index, image_format])
|
||||||
|
#open( image_path, "wb" ) do |image_fh|
|
||||||
|
# image_fh.write( )
|
||||||
|
#end
|
||||||
|
ar.add_buffer( image_path, open( page.image ).read )
|
||||||
|
page_next = page.next
|
||||||
|
break if page_next.url == page.url
|
||||||
|
page_next.index = page.index + 1
|
||||||
page = page.next
|
page = page.next
|
||||||
sleep config_yml['sleep']
|
sleep config_yml['sleep']
|
||||||
end
|
end
|
||||||
|
ar.close
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue