Add support for device list & per-device parameters
This commit is contained in:
parent
9943ce0632
commit
d9fe2e1230
7 changed files with 189 additions and 26 deletions
|
@ -2,6 +2,7 @@
|
||||||
# Load external dependencies
|
# Load external dependencies
|
||||||
require 'thor'
|
require 'thor'
|
||||||
require 'celluloid/current'
|
require 'celluloid/current'
|
||||||
|
require 'mechanize'
|
||||||
require 'selenium-webdriver'
|
require 'selenium-webdriver'
|
||||||
require 'yaml'
|
require 'yaml'
|
||||||
require 'thor'
|
require 'thor'
|
||||||
|
@ -15,6 +16,7 @@ require 'webgalien/actors/work_actor'
|
||||||
|
|
||||||
require 'webgalien/screenshot'
|
require 'webgalien/screenshot'
|
||||||
require 'webgalien/sitemap'
|
require 'webgalien/sitemap'
|
||||||
|
require 'webgalien/devices'
|
||||||
|
|
||||||
# Load cli
|
# Load cli
|
||||||
require 'webgalien/cli'
|
require 'webgalien/cli'
|
||||||
|
|
|
@ -4,13 +4,17 @@ module Webgalien
|
||||||
class CropPngActor
|
class CropPngActor
|
||||||
include Celluloid
|
include Celluloid
|
||||||
|
|
||||||
|
def initialize(output_path)
|
||||||
|
@output_path = output_path
|
||||||
|
end
|
||||||
|
|
||||||
def perform(work_future)
|
def perform(work_future)
|
||||||
work = work_future.value
|
work = work_future.value
|
||||||
work.shift!
|
work.shift!
|
||||||
|
|
||||||
input_path = work.input[:path]
|
input_path = work.input[:path]
|
||||||
bbox = work.input[:bbox]
|
bbox = work.input[:bbox]
|
||||||
output_path = File.join(TMP_PREFIX, 'crop-' + work.id + '.png')
|
output_path = File.join(@output_path, 'crop-' + work.id + '.png')
|
||||||
|
|
||||||
puts "(#{work.id}) cropping capture".green
|
puts "(#{work.id}) cropping capture".green
|
||||||
system 'convert ' \
|
system 'convert ' \
|
||||||
|
|
|
@ -8,6 +8,10 @@ module Webgalien
|
||||||
class ScreenshotActor
|
class ScreenshotActor
|
||||||
include Celluloid
|
include Celluloid
|
||||||
|
|
||||||
|
def initialize(output_path)
|
||||||
|
@output_path = output_path
|
||||||
|
end
|
||||||
|
|
||||||
def perform(future_work)
|
def perform(future_work)
|
||||||
driver = initialize_selenium_driver
|
driver = initialize_selenium_driver
|
||||||
|
|
||||||
|
@ -15,16 +19,24 @@ module Webgalien
|
||||||
work.shift!
|
work.shift!
|
||||||
|
|
||||||
url = work.input[:url]
|
url = work.input[:url]
|
||||||
# Start selenium work
|
|
||||||
# platform_login driver
|
|
||||||
# Go to wanted page
|
# Go to wanted page
|
||||||
driver.manage.window.resize_to(1440, 8000)
|
driver.manage.window.resize_to(1440, 900)
|
||||||
puts "(#{work.id}) loading page #{url}"
|
puts "(#{work.id}) loading page #{url}"
|
||||||
driver.navigate.to url
|
driver.navigate.to url
|
||||||
|
|
||||||
puts "(#{work.id}) waiting DOM stability"
|
puts "(#{work.id}) waiting DOM stability"
|
||||||
wait_dom_stability(driver)
|
wait_dom_stability(driver)
|
||||||
|
|
||||||
|
# get page size
|
||||||
|
element = driver.find_element(:css, "body")
|
||||||
|
height = element.size.height.to_i + 1
|
||||||
|
puts "(#{work.id}) resizing to 1440x#{height}"
|
||||||
|
driver.manage.window.resize_to(1440, height)
|
||||||
|
|
||||||
|
puts "(#{work.id}) waiting DOM stability"
|
||||||
|
wait_dom_stability(driver)
|
||||||
|
|
||||||
puts "(#{work.id}) getting page dimensions"
|
puts "(#{work.id}) getting page dimensions"
|
||||||
element = driver.find_element(:css, "body")
|
element = driver.find_element(:css, "body")
|
||||||
location = element.location
|
location = element.location
|
||||||
|
@ -37,9 +49,9 @@ module Webgalien
|
||||||
y: 0 # location.y.to_i
|
y: 0 # location.y.to_i
|
||||||
}
|
}
|
||||||
|
|
||||||
puts "(#{work.id}) saving page"
|
tmp_path = File.join(@output_path, 'capture-' + work.id.to_s + '.png')
|
||||||
FileUtils.mkdir_p(TMP_PREFIX)
|
puts "(#{work.id}) saving page to #{tmp_path}"
|
||||||
tmp_path = File.join(TMP_PREFIX, 'capture-' + work.id.to_s + '.png')
|
FileUtils.mkdir_p(@output_path)
|
||||||
driver.save_screenshot tmp_path
|
driver.save_screenshot tmp_path
|
||||||
|
|
||||||
driver.quit
|
driver.quit
|
||||||
|
@ -54,12 +66,22 @@ module Webgalien
|
||||||
private
|
private
|
||||||
|
|
||||||
def initialize_selenium_driver
|
def initialize_selenium_driver
|
||||||
|
client = Selenium::WebDriver::Remote::Http::Default.new
|
||||||
|
client.read_timeout = 120
|
||||||
|
client.open_timeout = 120
|
||||||
|
|
||||||
options = Selenium::WebDriver::Chrome::Options.new
|
options = Selenium::WebDriver::Chrome::Options.new
|
||||||
options.add_argument('--headless')
|
options.add_argument('--headless')
|
||||||
options.add_argument('--disable-gpu')
|
options.add_argument('--disable-gpu')
|
||||||
|
options.add_argument('--dns-prefetch-disable')
|
||||||
options.add_argument("--user-agent=#{USER_AGENT}")
|
options.add_argument("--user-agent=#{USER_AGENT}")
|
||||||
|
|
||||||
Selenium::WebDriver.for :chrome, options: options
|
# driver.timeout = 90 # instead of the default 60
|
||||||
|
Selenium::WebDriver.for(
|
||||||
|
:chrome,
|
||||||
|
options: options,
|
||||||
|
http_client: client
|
||||||
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
# wait for DOM structure to be stabilizer for 5 consecutive tries
|
# wait for DOM structure to be stabilizer for 5 consecutive tries
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
|
|
||||||
|
require 'table_print'
|
||||||
|
|
||||||
module Webgalien
|
module Webgalien
|
||||||
class Cli < Thor
|
class Cli < Thor
|
||||||
class_option :'user-agent',
|
class_option :'user-agent',
|
||||||
|
@ -6,32 +8,57 @@ module Webgalien
|
||||||
banner: 'USER-AGENT',
|
banner: 'USER-AGENT',
|
||||||
type: :string,
|
type: :string,
|
||||||
desc: 'choose user agent (default Mozilla)'
|
desc: 'choose user agent (default Mozilla)'
|
||||||
|
option :output,
|
||||||
|
aliases: '-o',
|
||||||
|
banner: 'OUTPUT-FILE',
|
||||||
|
type: :string,
|
||||||
|
default: 'sitemap.yml',
|
||||||
|
desc: 'where sitemap will be produced (default: sitemap.yml)'
|
||||||
|
|
||||||
desc 'sitemap URL FILE', 'crawl site and export sitemap'
|
desc 'sitemap URL FILE', 'crawl site and export sitemap'
|
||||||
def sitemap url, file
|
def sitemap url
|
||||||
|
Sitemap.start(
|
||||||
|
url: url,
|
||||||
|
output: options['output']
|
||||||
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
desc 'screenshot FILE', 'take screenshots for each page'
|
desc 'screenshot FILE', 'take screenshots for each page'
|
||||||
|
option :device,
|
||||||
|
aliases: '-d',
|
||||||
|
banner: 'DEVICE',
|
||||||
|
type: :string,
|
||||||
|
desc: 'set device from "list-devices" (default "desktop")'
|
||||||
option :profile,
|
option :profile,
|
||||||
aliases: '-p',
|
aliases: '-r',
|
||||||
banner: 'PROFILE',
|
banner: '[portrait|landscape]',
|
||||||
type: :string,
|
type: :string,
|
||||||
desc: 'choose device profile / resolution (default 1440x900 on desktop pc)'
|
desc: 'choose device orientation (default "portrait")'
|
||||||
option :output,
|
option :"output-path",
|
||||||
aliases: '-o',
|
aliases: '-o',
|
||||||
banner: 'OUTPUT-DIRECTORY',
|
banner: 'OUTPUT-PATH',
|
||||||
type: :string,
|
type: :string,
|
||||||
default: '.',
|
default: 'cache',
|
||||||
desc: 'where resulting content will be produced'
|
desc: 'directory where resulting content will be produced'
|
||||||
|
|
||||||
def screenshot file
|
|
||||||
config = YAML.load File.open(file)
|
|
||||||
prefixed_urls = config['pages'].map {|u| config['root'] + u }
|
|
||||||
|
|
||||||
|
def screenshot sitemap
|
||||||
|
if not Devices.exist?(options['device']) then
|
||||||
|
STDERR.puts "ERROR: device #{options['device']} does not exist"
|
||||||
|
exit 1
|
||||||
|
end
|
||||||
Screenshot.start(
|
Screenshot.start(
|
||||||
urls: prefixed_urls,
|
sitemap: sitemap,
|
||||||
output_path: options['output']
|
output_path: options['output-path'],
|
||||||
|
device: options['device'],
|
||||||
|
orientation: options['orientation']
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
desc 'list-devices', 'list available profiles'
|
||||||
|
def list_devices
|
||||||
|
# from https://mediag.com/news/popular-screen-resolutions-designing-for-all/
|
||||||
|
Devices.display_list
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
56
lib/webgalien/devices.rb
Normal file
56
lib/webgalien/devices.rb
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
require 'csv'
|
||||||
|
|
||||||
|
module Webgalien
|
||||||
|
class Devices
|
||||||
|
PROFILES_CSV = <<-MARK
|
||||||
|
laptop, 1440, 900, computer, "x"
|
||||||
|
desktop, 1280, 768, computer, "x"
|
||||||
|
|
||||||
|
apple-iphone-x, 375, 812, phone, "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"
|
||||||
|
apple-iphone-8plus, 414, 736, phone, "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"
|
||||||
|
apple-iphone-8, 375, 667, phone, "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"
|
||||||
|
apple-iphone-7plus, 414, 736, phone
|
||||||
|
apple-iphone-7, 375, 667, phone
|
||||||
|
apple-iphone-6plus, 414, 736, phone
|
||||||
|
apple-iphone-6, 375, 667, phone
|
||||||
|
apple-iphone-5, 320, 568, phone
|
||||||
|
apple-ipad-pro, 1024, 1366, tablet
|
||||||
|
apple-ipad, 768, 1024, tablet
|
||||||
|
apple-air, 768, 1024, tablet
|
||||||
|
apple-air-2, 768, 1024, tablet
|
||||||
|
apple-mini, 768, 1024, tablet
|
||||||
|
apple-mini-2, 768, 1024, tablet
|
||||||
|
apple-mini-3, 768, 1024, tablet
|
||||||
|
|
||||||
|
samsung-galaxy-s9, 360, 740, phone
|
||||||
|
samsung-galaxy-s8plus, 360, 740, phone
|
||||||
|
samsung-galaxy-s8, 360, 740, phone
|
||||||
|
samsung-galaxy-s7, 360, 640, phone
|
||||||
|
nexus-6p, 411, 731, phone
|
||||||
|
MARK
|
||||||
|
.freeze
|
||||||
|
|
||||||
|
PROFILES = CSV.parse(PROFILES_CSV, skip_blanks: true, quote_char: '"', liberal_parsing: true )
|
||||||
|
.map do |model, width, height, type, agent|
|
||||||
|
{
|
||||||
|
model: model&.strip,
|
||||||
|
width: width&.strip,
|
||||||
|
height: height&.strip,
|
||||||
|
type: type&.strip,
|
||||||
|
agent: agent&.strip
|
||||||
|
}
|
||||||
|
end.freeze
|
||||||
|
|
||||||
|
def self.display_list
|
||||||
|
tp PROFILES
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.exist? profile
|
||||||
|
require 'pp'
|
||||||
|
pp PROFILES
|
||||||
|
exit 1
|
||||||
|
return PROFILES.map {|x| x.models }.exist? profile
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
|
@ -1,16 +1,19 @@
|
||||||
|
|
||||||
module Webgalien
|
module Webgalien
|
||||||
class Screenshot
|
class Screenshot
|
||||||
def self.start(urls:, output_path:)
|
def self.start(sitemap:, output_path: , device:, orientation:)
|
||||||
|
config = YAML.load File.open(sitemap)
|
||||||
|
urls = config['pages']
|
||||||
|
|
||||||
# Start workpools
|
# Start workpools
|
||||||
cores = Celluloid.cores
|
cores = Celluloid.cores
|
||||||
screenshot_pool = ScreenshotActor.pool(
|
screenshot_pool = ScreenshotActor.pool(
|
||||||
size: cores,
|
size: cores,
|
||||||
args: { output_path: output_path }
|
args: [ output_path ]
|
||||||
)
|
)
|
||||||
crop_pool = CropPngActor.pool(
|
crop_pool = CropPngActor.pool(
|
||||||
size: cores,
|
size: cores,
|
||||||
args: { output_path: output_path }
|
args: [ output_path ]
|
||||||
)
|
)
|
||||||
|
|
||||||
futures =
|
futures =
|
||||||
|
|
|
@ -1,7 +1,56 @@
|
||||||
|
|
||||||
module Webgalien
|
module Webgalien
|
||||||
class Sitemap
|
class Sitemap
|
||||||
def self.start(output_path:)
|
def self.start(url:, output:)
|
||||||
|
puts "Loading #{url}"
|
||||||
|
|
||||||
|
visited = Set.new
|
||||||
|
remains = Set.new
|
||||||
|
remains << url
|
||||||
|
root = url
|
||||||
|
|
||||||
|
while remains.size > 0
|
||||||
|
current = remains.to_a[0]
|
||||||
|
remains.delete(current)
|
||||||
|
|
||||||
|
current2, links = Sitemap.get_links(root, current)
|
||||||
|
visited << current2
|
||||||
|
|
||||||
|
remains =
|
||||||
|
remains + links.to_set - visited - visited.map{|x| x.gsub(/\/$/,'') }
|
||||||
|
end
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"root" => root,
|
||||||
|
"pages" => visited.to_a
|
||||||
|
}
|
||||||
|
File.write(output, result.to_yaml)
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
def self.get_links(root, url)
|
||||||
|
links = []
|
||||||
|
mechanize = Mechanize.new
|
||||||
|
page = mechanize.get(url)
|
||||||
|
url2 = page.uri.to_s
|
||||||
|
|
||||||
|
page.links.each do |link|
|
||||||
|
next if ! link.href =~ /^https?:\/\//
|
||||||
|
begin
|
||||||
|
link_url = mechanize.resolve(link.href).to_s
|
||||||
|
print "Found #{url} -> #{link_url} "
|
||||||
|
if link_url.start_with?(root) then
|
||||||
|
puts "(ok)".green
|
||||||
|
links << link_url
|
||||||
|
else
|
||||||
|
puts "(out of scope)".red
|
||||||
|
end
|
||||||
|
rescue Mechanize::UnsupportedSchemeError
|
||||||
|
print "Found #{url} -> #{link.href} "
|
||||||
|
puts "(unsupported scheme)".red
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return url2, links
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue