Add support for device list & per-device parameters
This commit is contained in:
parent
9943ce0632
commit
d9fe2e1230
7 changed files with 189 additions and 26 deletions
|
@ -2,6 +2,7 @@
|
|||
# Load external dependencies
|
||||
require 'thor'
|
||||
require 'celluloid/current'
|
||||
require 'mechanize'
|
||||
require 'selenium-webdriver'
|
||||
require 'yaml'
|
||||
require 'thor'
|
||||
|
@ -15,6 +16,7 @@ require 'webgalien/actors/work_actor'
|
|||
|
||||
require 'webgalien/screenshot'
|
||||
require 'webgalien/sitemap'
|
||||
require 'webgalien/devices'
|
||||
|
||||
# Load cli
|
||||
require 'webgalien/cli'
|
||||
|
|
|
@ -4,13 +4,17 @@ module Webgalien
|
|||
class CropPngActor
|
||||
include Celluloid
|
||||
|
||||
def initialize(output_path)
|
||||
@output_path = output_path
|
||||
end
|
||||
|
||||
def perform(work_future)
|
||||
work = work_future.value
|
||||
work.shift!
|
||||
|
||||
input_path = work.input[:path]
|
||||
bbox = work.input[:bbox]
|
||||
output_path = File.join(TMP_PREFIX, 'crop-' + work.id + '.png')
|
||||
output_path = File.join(@output_path, 'crop-' + work.id + '.png')
|
||||
|
||||
puts "(#{work.id}) cropping capture".green
|
||||
system 'convert ' \
|
||||
|
|
|
@ -8,6 +8,10 @@ module Webgalien
|
|||
class ScreenshotActor
|
||||
include Celluloid
|
||||
|
||||
def initialize(output_path)
|
||||
@output_path = output_path
|
||||
end
|
||||
|
||||
def perform(future_work)
|
||||
driver = initialize_selenium_driver
|
||||
|
||||
|
@ -15,16 +19,24 @@ module Webgalien
|
|||
work.shift!
|
||||
|
||||
url = work.input[:url]
|
||||
# Start selenium work
|
||||
# platform_login driver
|
||||
|
||||
# Go to wanted page
|
||||
driver.manage.window.resize_to(1440, 8000)
|
||||
driver.manage.window.resize_to(1440, 900)
|
||||
puts "(#{work.id}) loading page #{url}"
|
||||
driver.navigate.to url
|
||||
|
||||
puts "(#{work.id}) waiting DOM stability"
|
||||
wait_dom_stability(driver)
|
||||
|
||||
# get page size
|
||||
element = driver.find_element(:css, "body")
|
||||
height = element.size.height.to_i + 1
|
||||
puts "(#{work.id}) resizing to 1440x#{height}"
|
||||
driver.manage.window.resize_to(1440, height)
|
||||
|
||||
puts "(#{work.id}) waiting DOM stability"
|
||||
wait_dom_stability(driver)
|
||||
|
||||
puts "(#{work.id}) getting page dimensions"
|
||||
element = driver.find_element(:css, "body")
|
||||
location = element.location
|
||||
|
@ -37,9 +49,9 @@ module Webgalien
|
|||
y: 0 # location.y.to_i
|
||||
}
|
||||
|
||||
puts "(#{work.id}) saving page"
|
||||
FileUtils.mkdir_p(TMP_PREFIX)
|
||||
tmp_path = File.join(TMP_PREFIX, 'capture-' + work.id.to_s + '.png')
|
||||
tmp_path = File.join(@output_path, 'capture-' + work.id.to_s + '.png')
|
||||
puts "(#{work.id}) saving page to #{tmp_path}"
|
||||
FileUtils.mkdir_p(@output_path)
|
||||
driver.save_screenshot tmp_path
|
||||
|
||||
driver.quit
|
||||
|
@ -54,12 +66,22 @@ module Webgalien
|
|||
private
|
||||
|
||||
def initialize_selenium_driver
|
||||
client = Selenium::WebDriver::Remote::Http::Default.new
|
||||
client.read_timeout = 120
|
||||
client.open_timeout = 120
|
||||
|
||||
options = Selenium::WebDriver::Chrome::Options.new
|
||||
options.add_argument('--headless')
|
||||
options.add_argument('--disable-gpu')
|
||||
options.add_argument('--dns-prefetch-disable')
|
||||
options.add_argument("--user-agent=#{USER_AGENT}")
|
||||
|
||||
Selenium::WebDriver.for :chrome, options: options
|
||||
# driver.timeout = 90 # instead of the default 60
|
||||
Selenium::WebDriver.for(
|
||||
:chrome,
|
||||
options: options,
|
||||
http_client: client
|
||||
)
|
||||
end
|
||||
|
||||
# wait for DOM structure to be stabilizer for 5 consecutive tries
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
|
||||
require 'table_print'
|
||||
|
||||
module Webgalien
|
||||
class Cli < Thor
|
||||
class_option :'user-agent',
|
||||
|
@ -6,32 +8,57 @@ module Webgalien
|
|||
banner: 'USER-AGENT',
|
||||
type: :string,
|
||||
desc: 'choose user agent (default Mozilla)'
|
||||
option :output,
|
||||
aliases: '-o',
|
||||
banner: 'OUTPUT-FILE',
|
||||
type: :string,
|
||||
default: 'sitemap.yml',
|
||||
desc: 'where sitemap will be produced (default: sitemap.yml)'
|
||||
|
||||
desc 'sitemap URL FILE', 'crawl site and export sitemap'
|
||||
def sitemap url, file
|
||||
def sitemap url
|
||||
Sitemap.start(
|
||||
url: url,
|
||||
output: options['output']
|
||||
)
|
||||
end
|
||||
|
||||
desc 'screenshot FILE', 'take screenshots for each page'
|
||||
option :device,
|
||||
aliases: '-d',
|
||||
banner: 'DEVICE',
|
||||
type: :string,
|
||||
desc: 'set device from "list-devices" (default "desktop")'
|
||||
option :profile,
|
||||
aliases: '-p',
|
||||
banner: 'PROFILE',
|
||||
aliases: '-r',
|
||||
banner: '[portrait|landscape]',
|
||||
type: :string,
|
||||
desc: 'choose device profile / resolution (default 1440x900 on desktop pc)'
|
||||
option :output,
|
||||
desc: 'choose device orientation (default "portrait")'
|
||||
option :"output-path",
|
||||
aliases: '-o',
|
||||
banner: 'OUTPUT-DIRECTORY',
|
||||
banner: 'OUTPUT-PATH',
|
||||
type: :string,
|
||||
default: '.',
|
||||
desc: 'where resulting content will be produced'
|
||||
|
||||
def screenshot file
|
||||
config = YAML.load File.open(file)
|
||||
prefixed_urls = config['pages'].map {|u| config['root'] + u }
|
||||
default: 'cache',
|
||||
desc: 'directory where resulting content will be produced'
|
||||
|
||||
def screenshot sitemap
|
||||
if not Devices.exist?(options['device']) then
|
||||
STDERR.puts "ERROR: device #{options['device']} does not exist"
|
||||
exit 1
|
||||
end
|
||||
Screenshot.start(
|
||||
urls: prefixed_urls,
|
||||
output_path: options['output']
|
||||
sitemap: sitemap,
|
||||
output_path: options['output-path'],
|
||||
device: options['device'],
|
||||
orientation: options['orientation']
|
||||
)
|
||||
end
|
||||
|
||||
|
||||
desc 'list-devices', 'list available profiles'
|
||||
def list_devices
|
||||
# from https://mediag.com/news/popular-screen-resolutions-designing-for-all/
|
||||
Devices.display_list
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
56
lib/webgalien/devices.rb
Normal file
56
lib/webgalien/devices.rb
Normal file
|
@ -0,0 +1,56 @@
|
|||
require 'csv'
|
||||
|
||||
module Webgalien
|
||||
class Devices
|
||||
PROFILES_CSV = <<-MARK
|
||||
laptop, 1440, 900, computer, "x"
|
||||
desktop, 1280, 768, computer, "x"
|
||||
|
||||
apple-iphone-x, 375, 812, phone, "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"
|
||||
apple-iphone-8plus, 414, 736, phone, "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"
|
||||
apple-iphone-8, 375, 667, phone, "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"
|
||||
apple-iphone-7plus, 414, 736, phone
|
||||
apple-iphone-7, 375, 667, phone
|
||||
apple-iphone-6plus, 414, 736, phone
|
||||
apple-iphone-6, 375, 667, phone
|
||||
apple-iphone-5, 320, 568, phone
|
||||
apple-ipad-pro, 1024, 1366, tablet
|
||||
apple-ipad, 768, 1024, tablet
|
||||
apple-air, 768, 1024, tablet
|
||||
apple-air-2, 768, 1024, tablet
|
||||
apple-mini, 768, 1024, tablet
|
||||
apple-mini-2, 768, 1024, tablet
|
||||
apple-mini-3, 768, 1024, tablet
|
||||
|
||||
samsung-galaxy-s9, 360, 740, phone
|
||||
samsung-galaxy-s8plus, 360, 740, phone
|
||||
samsung-galaxy-s8, 360, 740, phone
|
||||
samsung-galaxy-s7, 360, 640, phone
|
||||
nexus-6p, 411, 731, phone
|
||||
MARK
|
||||
.freeze
|
||||
|
||||
PROFILES = CSV.parse(PROFILES_CSV, skip_blanks: true, quote_char: '"', liberal_parsing: true )
|
||||
.map do |model, width, height, type, agent|
|
||||
{
|
||||
model: model&.strip,
|
||||
width: width&.strip,
|
||||
height: height&.strip,
|
||||
type: type&.strip,
|
||||
agent: agent&.strip
|
||||
}
|
||||
end.freeze
|
||||
|
||||
def self.display_list
|
||||
tp PROFILES
|
||||
end
|
||||
|
||||
def self.exist? profile
|
||||
require 'pp'
|
||||
pp PROFILES
|
||||
exit 1
|
||||
return PROFILES.map {|x| x.models }.exist? profile
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
@ -1,16 +1,19 @@
|
|||
|
||||
module Webgalien
|
||||
class Screenshot
|
||||
def self.start(urls:, output_path:)
|
||||
def self.start(sitemap:, output_path: , device:, orientation:)
|
||||
config = YAML.load File.open(sitemap)
|
||||
urls = config['pages']
|
||||
|
||||
# Start workpools
|
||||
cores = Celluloid.cores
|
||||
screenshot_pool = ScreenshotActor.pool(
|
||||
size: cores,
|
||||
args: { output_path: output_path }
|
||||
args: [ output_path ]
|
||||
)
|
||||
crop_pool = CropPngActor.pool(
|
||||
size: cores,
|
||||
args: { output_path: output_path }
|
||||
args: [ output_path ]
|
||||
)
|
||||
|
||||
futures =
|
||||
|
|
|
@ -1,7 +1,56 @@
|
|||
|
||||
module Webgalien
|
||||
class Sitemap
|
||||
def self.start(output_path:)
|
||||
def self.start(url:, output:)
|
||||
puts "Loading #{url}"
|
||||
|
||||
visited = Set.new
|
||||
remains = Set.new
|
||||
remains << url
|
||||
root = url
|
||||
|
||||
while remains.size > 0
|
||||
current = remains.to_a[0]
|
||||
remains.delete(current)
|
||||
|
||||
current2, links = Sitemap.get_links(root, current)
|
||||
visited << current2
|
||||
|
||||
remains =
|
||||
remains + links.to_set - visited - visited.map{|x| x.gsub(/\/$/,'') }
|
||||
end
|
||||
|
||||
result = {
|
||||
"root" => root,
|
||||
"pages" => visited.to_a
|
||||
}
|
||||
File.write(output, result.to_yaml)
|
||||
end
|
||||
|
||||
|
||||
def self.get_links(root, url)
|
||||
links = []
|
||||
mechanize = Mechanize.new
|
||||
page = mechanize.get(url)
|
||||
url2 = page.uri.to_s
|
||||
|
||||
page.links.each do |link|
|
||||
next if ! link.href =~ /^https?:\/\//
|
||||
begin
|
||||
link_url = mechanize.resolve(link.href).to_s
|
||||
print "Found #{url} -> #{link_url} "
|
||||
if link_url.start_with?(root) then
|
||||
puts "(ok)".green
|
||||
links << link_url
|
||||
else
|
||||
puts "(out of scope)".red
|
||||
end
|
||||
rescue Mechanize::UnsupportedSchemeError
|
||||
print "Found #{url} -> #{link.href} "
|
||||
puts "(unsupported scheme)".red
|
||||
end
|
||||
end
|
||||
return url2, links
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue