Add support for device list & per-device parameters

This commit is contained in:
Glenn Y. Rolland 2018-10-11 10:58:38 +03:00
parent 9943ce0632
commit d9fe2e1230
7 changed files with 189 additions and 26 deletions

View file

@ -2,6 +2,7 @@
# Load external dependencies
require 'thor'
require 'celluloid/current'
require 'mechanize'
require 'selenium-webdriver'
require 'yaml'
require 'thor'
@ -15,6 +16,7 @@ require 'webgalien/actors/work_actor'
require 'webgalien/screenshot'
require 'webgalien/sitemap'
require 'webgalien/devices'
# Load cli
require 'webgalien/cli'

View file

@ -4,13 +4,17 @@ module Webgalien
class CropPngActor
include Celluloid
def initialize(output_path)
@output_path = output_path
end
def perform(work_future)
work = work_future.value
work.shift!
input_path = work.input[:path]
bbox = work.input[:bbox]
output_path = File.join(TMP_PREFIX, 'crop-' + work.id + '.png')
output_path = File.join(@output_path, 'crop-' + work.id + '.png')
puts "(#{work.id}) cropping capture".green
system 'convert ' \

View file

@ -8,6 +8,10 @@ module Webgalien
class ScreenshotActor
include Celluloid
def initialize(output_path)
@output_path = output_path
end
def perform(future_work)
driver = initialize_selenium_driver
@ -15,16 +19,24 @@ module Webgalien
work.shift!
url = work.input[:url]
# Start selenium work
# platform_login driver
# Go to wanted page
driver.manage.window.resize_to(1440, 8000)
driver.manage.window.resize_to(1440, 900)
puts "(#{work.id}) loading page #{url}"
driver.navigate.to url
puts "(#{work.id}) waiting DOM stability"
wait_dom_stability(driver)
# get page size
element = driver.find_element(:css, "body")
height = element.size.height.to_i + 1
puts "(#{work.id}) resizing to 1440x#{height}"
driver.manage.window.resize_to(1440, height)
puts "(#{work.id}) waiting DOM stability"
wait_dom_stability(driver)
puts "(#{work.id}) getting page dimensions"
element = driver.find_element(:css, "body")
location = element.location
@ -37,9 +49,9 @@ module Webgalien
y: 0 # location.y.to_i
}
puts "(#{work.id}) saving page"
FileUtils.mkdir_p(TMP_PREFIX)
tmp_path = File.join(TMP_PREFIX, 'capture-' + work.id.to_s + '.png')
tmp_path = File.join(@output_path, 'capture-' + work.id.to_s + '.png')
puts "(#{work.id}) saving page to #{tmp_path}"
FileUtils.mkdir_p(@output_path)
driver.save_screenshot tmp_path
driver.quit
@ -54,12 +66,22 @@ module Webgalien
private
def initialize_selenium_driver
client = Selenium::WebDriver::Remote::Http::Default.new
client.read_timeout = 120
client.open_timeout = 120
options = Selenium::WebDriver::Chrome::Options.new
options.add_argument('--headless')
options.add_argument('--disable-gpu')
options.add_argument('--dns-prefetch-disable')
options.add_argument("--user-agent=#{USER_AGENT}")
Selenium::WebDriver.for :chrome, options: options
# driver.timeout = 90 # instead of the default 60
Selenium::WebDriver.for(
:chrome,
options: options,
http_client: client
)
end
# wait for DOM structure to be stabilizer for 5 consecutive tries

View file

@ -1,4 +1,6 @@
require 'table_print'
module Webgalien
class Cli < Thor
class_option :'user-agent',
@ -6,32 +8,57 @@ module Webgalien
banner: 'USER-AGENT',
type: :string,
desc: 'choose user agent (default Mozilla)'
option :output,
aliases: '-o',
banner: 'OUTPUT-FILE',
type: :string,
default: 'sitemap.yml',
desc: 'where sitemap will be produced (default: sitemap.yml)'
desc 'sitemap URL FILE', 'crawl site and export sitemap'
def sitemap url, file
def sitemap url
Sitemap.start(
url: url,
output: options['output']
)
end
desc 'screenshot FILE', 'take screenshots for each page'
option :device,
aliases: '-d',
banner: 'DEVICE',
type: :string,
desc: 'set device from "list-devices" (default "desktop")'
option :profile,
aliases: '-p',
banner: 'PROFILE',
aliases: '-r',
banner: '[portrait|landscape]',
type: :string,
desc: 'choose device profile / resolution (default 1440x900 on desktop pc)'
option :output,
desc: 'choose device orientation (default "portrait")'
option :"output-path",
aliases: '-o',
banner: 'OUTPUT-DIRECTORY',
banner: 'OUTPUT-PATH',
type: :string,
default: '.',
desc: 'where resulting content will be produced'
def screenshot file
config = YAML.load File.open(file)
prefixed_urls = config['pages'].map {|u| config['root'] + u }
default: 'cache',
desc: 'directory where resulting content will be produced'
def screenshot sitemap
if not Devices.exist?(options['device']) then
STDERR.puts "ERROR: device #{options['device']} does not exist"
exit 1
end
Screenshot.start(
urls: prefixed_urls,
output_path: options['output']
sitemap: sitemap,
output_path: options['output-path'],
device: options['device'],
orientation: options['orientation']
)
end
desc 'list-devices', 'list available profiles'
def list_devices
# from https://mediag.com/news/popular-screen-resolutions-designing-for-all/
Devices.display_list
end
end
end

56
lib/webgalien/devices.rb Normal file
View file

@ -0,0 +1,56 @@
require 'csv'
module Webgalien
class Devices
PROFILES_CSV = <<-MARK
laptop, 1440, 900, computer, "x"
desktop, 1280, 768, computer, "x"
apple-iphone-x, 375, 812, phone, "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"
apple-iphone-8plus, 414, 736, phone, "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"
apple-iphone-8, 375, 667, phone, "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"
apple-iphone-7plus, 414, 736, phone
apple-iphone-7, 375, 667, phone
apple-iphone-6plus, 414, 736, phone
apple-iphone-6, 375, 667, phone
apple-iphone-5, 320, 568, phone
apple-ipad-pro, 1024, 1366, tablet
apple-ipad, 768, 1024, tablet
apple-air, 768, 1024, tablet
apple-air-2, 768, 1024, tablet
apple-mini, 768, 1024, tablet
apple-mini-2, 768, 1024, tablet
apple-mini-3, 768, 1024, tablet
samsung-galaxy-s9, 360, 740, phone
samsung-galaxy-s8plus, 360, 740, phone
samsung-galaxy-s8, 360, 740, phone
samsung-galaxy-s7, 360, 640, phone
nexus-6p, 411, 731, phone
MARK
.freeze
PROFILES = CSV.parse(PROFILES_CSV, skip_blanks: true, quote_char: '"', liberal_parsing: true )
.map do |model, width, height, type, agent|
{
model: model&.strip,
width: width&.strip,
height: height&.strip,
type: type&.strip,
agent: agent&.strip
}
end.freeze
def self.display_list
tp PROFILES
end
def self.exist? profile
require 'pp'
pp PROFILES
exit 1
return PROFILES.map {|x| x.models }.exist? profile
end
end
end

View file

@ -1,16 +1,19 @@
module Webgalien
class Screenshot
def self.start(urls:, output_path:)
def self.start(sitemap:, output_path: , device:, orientation:)
config = YAML.load File.open(sitemap)
urls = config['pages']
# Start workpools
cores = Celluloid.cores
screenshot_pool = ScreenshotActor.pool(
size: cores,
args: { output_path: output_path }
args: [ output_path ]
)
crop_pool = CropPngActor.pool(
size: cores,
args: { output_path: output_path }
args: [ output_path ]
)
futures =

View file

@ -1,7 +1,56 @@
module Webgalien
class Sitemap
def self.start(output_path:)
def self.start(url:, output:)
puts "Loading #{url}"
visited = Set.new
remains = Set.new
remains << url
root = url
while remains.size > 0
current = remains.to_a[0]
remains.delete(current)
current2, links = Sitemap.get_links(root, current)
visited << current2
remains =
remains + links.to_set - visited - visited.map{|x| x.gsub(/\/$/,'') }
end
result = {
"root" => root,
"pages" => visited.to_a
}
File.write(output, result.to_yaml)
end
def self.get_links(root, url)
links = []
mechanize = Mechanize.new
page = mechanize.get(url)
url2 = page.uri.to_s
page.links.each do |link|
next if ! link.href =~ /^https?:\/\//
begin
link_url = mechanize.resolve(link.href).to_s
print "Found #{url} -> #{link_url} "
if link_url.start_with?(root) then
puts "(ok)".green
links << link_url
else
puts "(out of scope)".red
end
rescue Mechanize::UnsupportedSchemeError
print "Found #{url} -> #{link.href} "
puts "(unsupported scheme)".red
end
end
return url2, links
end
end
end