Improve code

This commit is contained in:
Glenn Y. Rolland 2018-09-21 01:25:39 +02:00
commit 521b525a2d
24 changed files with 1040 additions and 0 deletions

3
.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
cache/
node_modules/
*.pdf

2
.rubocop Normal file
View file

@ -0,0 +1,2 @@
Metrics/Length:
Max: 15

4
Gemfile Normal file
View file

@ -0,0 +1,4 @@
# frozen_string_literal: true
source "https://rubygems.org"
gemspec

55
Gemfile.lock Normal file
View file

@ -0,0 +1,55 @@
PATH
remote: .
specs:
webgalien (0.1.0)
celluloid (~> 0.17.3)
colorize (~> 0.8.1)
selenium-webdriver (~> 3.14)
thor (~> 0.20.0)
GEM
remote: https://rubygems.org/
specs:
celluloid (0.17.3)
celluloid-essentials
celluloid-extras
celluloid-fsm
celluloid-pool
celluloid-supervision
timers (>= 4.1.1)
celluloid-essentials (0.20.5)
timers (>= 4.1.1)
celluloid-extras (0.20.5)
timers (>= 4.1.1)
celluloid-fsm (0.20.5)
timers (>= 4.1.1)
celluloid-pool (0.20.5)
timers (>= 4.1.1)
celluloid-supervision (0.20.6)
timers (>= 4.1.1)
childprocess (0.9.0)
ffi (~> 1.0, >= 1.0.11)
colorize (0.8.1)
ffi (1.9.25)
hitimes (1.3.0)
minitest (5.11.3)
rake (12.3.1)
rubyzip (1.2.2)
selenium-webdriver (3.14.0)
childprocess (~> 0.5)
rubyzip (~> 1.2)
thor (0.20.0)
timers (4.1.2)
hitimes
PLATFORMS
ruby
DEPENDENCIES
bundler
minitest
rake
webgalien!
BUNDLED WITH
1.16.0

13
README.md Normal file
View file

@ -0,0 +1,13 @@
Prerequisites
apt-get install img2pdf imagemagick
Increase memory in imagemagick policy `/etc/ImageMagick-6/policy.xml`
<policy domain="resource" name="memory" value="2GiB"/>
window.$FlowPaper('documentViewer').config
window.$FlowPaper('documentViewer').switchMode('portrait')

10
bin/install Executable file
View file

@ -0,0 +1,10 @@
#!/bin/sh
set -e
ROOTDIR="$(cd "$(dirname "$0")/.." || exit 1 ; pwd)"
cd "$ROOTDIR"
npm install
docker pull vitr/casperjs

18
bin/install.selenium Normal file
View file

@ -0,0 +1,18 @@
# platform options: linux32, linux64, mac64, win32
PLATFORM=linux64
# the directory to install the chromedriver binary in
INSTALLATION_DIRECTORY=~/bin/
# create it if necessary
mkdir -p $INSTALLATION_DIRECTORY
# and add it to PATH
export PATH="$INSTALLATION_DIRECTORY:$PATH"
# and add the PATH modification to ~/.bashrc
echo 'export PATH="$INSTALLATION_DIRECTORY:$PATH"' >> ~/.bashrc
# automatically find the latest version
VERSION=$(curl http://chromedriver.storage.googleapis.com/LATEST_RELEASE)
# download and extract the latest version
curl http://chromedriver.storage.googleapis.com/$VERSION/chromedriver_$PLATFORM.zip \
| bsdtar -xvf - -C $INSTALLATION_DIRECTORY

20
bin/run Executable file
View file

@ -0,0 +1,20 @@
#!/bin/sh
# --restart always \
# docker run -d \
# --rm \
# --name casperjs-daemon \
# -v $(pwd):/home/casperjs-tests \
# vitr/casperjs
#--engine=slimerjs \
export PATH="$(npm bin):$PATH"
$(npm bin)/casperjs \
--log-level=debug \
--engine=slimerjs \
./download.js \
"URL"
# docker exec casperjs-daemon phantomjs download.js "URL"
# docker stop casperjs-daemon

4
bin/uninstall Executable file
View file

@ -0,0 +1,4 @@
#!/bin/sh
docker stop casperjs-daemon

3
credential.yml Normal file
View file

@ -0,0 +1,3 @@
username:
password:

6
exe/webgalien Executable file
View file

@ -0,0 +1,6 @@
#!/usr/bin/env ruby
require 'webgalien'
Webgalien::Cli.start(ARGV)

21
lib/webgalien.rb Normal file
View file

@ -0,0 +1,21 @@
# Load external dependencies
require 'thor'
require 'celluloid/current'
require 'selenium-webdriver'
require 'yaml'
require 'thor'
require 'colorize'
require 'fileutils'
# Load actors
require 'webgalien/actors/crop_png_actor'
require 'webgalien/actors/screenshot_actor'
require 'webgalien/actors/work_actor'
require 'webgalien/screenshot'
require 'webgalien/sitemap'
# Load cli
require 'webgalien/cli'

View file

@ -0,0 +1,24 @@
# Actor for cropping png
module Webgalien
class CropPngActor
include Celluloid
def perform(work_future)
work = work_future.value
work.shift!
input_path = work.input[:path]
bbox = work.input[:bbox]
output_path = File.join(TMP_PREFIX, 'crop-' + work.id + '.png')
puts "(#{work.id}) cropping capture".green
system 'convert ' \
"-crop #{bbox[:w]}x#{bbox[:h]}+#{bbox[:x]}+#{bbox[:y]} " \
"#{input_path} #{output_path}"
work.output = { path: output_path }
work
end
end
end

View file

@ -0,0 +1,84 @@
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) ' \
'AppleWebKit/601.7.7 (KHTML, like Gecko) Version/9.1.2 Safari/601.7.7'.freeze
# configure the driver to run in headless mode
# Selenium::WebDriver.logger.level =
module Webgalien
class ScreenshotActor
include Celluloid
def perform(future_work)
driver = initialize_selenium_driver
work = future_work.value
work.shift!
url = work.input[:url]
# Start selenium work
# platform_login driver
# Go to wanted page
driver.manage.window.resize_to(1440, 8000)
puts "(#{work.id}) loading page #{url}"
driver.navigate.to url
puts "(#{work.id}) waiting DOM stability"
wait_dom_stability(driver)
puts "(#{work.id}) getting page dimensions"
element = driver.find_element(:css, "body")
location = element.location
size = element.size
bbox = {
w: size.width.to_i,
h: size.height.to_i,
x: location.x.to_i,
y: 0 # location.y.to_i
}
puts "(#{work.id}) saving page"
FileUtils.mkdir_p(TMP_PREFIX)
tmp_path = File.join(TMP_PREFIX, 'capture-' + work.id.to_s + '.png')
driver.save_screenshot tmp_path
driver.quit
work.output = {
bbox: bbox,
path: tmp_path
}
work
end
private
def initialize_selenium_driver
options = Selenium::WebDriver::Chrome::Options.new
options.add_argument('--headless')
options.add_argument('--disable-gpu')
options.add_argument("--user-agent=#{USER_AGENT}")
Selenium::WebDriver.for :chrome, options: options
end
# wait for DOM structure to be stabilizer for 5 consecutive tries
def wait_dom_stability(driver)
dom_before = nil
dom_now = nil
dom_stability = 0
wait = Selenium::WebDriver::Wait.new(timeout: 120)
wait.until do
# save old dom
dom_before = dom_now
# get new dom
dom_now = driver.find_element(:css, 'body').attribute('innerHTML')
# test stability
dom_stability += 1
dom_stability = 0 if dom_before != dom_now
(dom_stability > 15)
end
end
end
end

View file

@ -0,0 +1,24 @@
module Webgalien
class Work
attr_reader :id, :input, :artefacts
attr_accessor :output
def initialize(id:, input: )
@id = id.to_s.gsub(/[^a-zA-Z0-9_-]/, '-')
@input = input
@output = nil
@artefacts = []
end
# make pipe ready
def shift!
return if @output.nil?
@artefacts << @input
@input = @output
@output = nil
self
end
end
end

37
lib/webgalien/cli.rb Normal file
View file

@ -0,0 +1,37 @@
module Webgalien
class Cli < Thor
class_option :'user-agent',
aliases: '-ua',
banner: 'USER-AGENT',
type: :string,
desc: 'choose user agent (default Mozilla)'
desc 'sitemap URL FILE', 'crawl site and export sitemap'
def sitemap url, file
end
desc 'screenshot FILE', 'take screenshots for each page'
option :profile,
aliases: '-p',
banner: 'PROFILE',
type: :string,
desc: 'choose device profile / resolution (default 1440x900 on desktop pc)'
option :output,
aliases: '-o',
banner: 'OUTPUT-DIRECTORY',
type: :string,
default: '.',
desc: 'where resulting content will be produced'
def screenshot file
config = YAML.load File.open(file)
prefixed_urls = config['pages'].map {|u| config['root'] + u }
Screenshot.start(
urls: prefixed_urls,
output_path: options['output']
)
end
end
end

View file

@ -0,0 +1,40 @@
module Webgalien
class Screenshot
def self.start(urls:, output_path:)
# Start workpools
cores = Celluloid.cores
screenshot_pool = ScreenshotActor.pool(
size: cores,
args: { output_path: output_path }
)
crop_pool = CropPngActor.pool(
size: cores,
args: { output_path: output_path }
)
futures =
urls.
map do |url|
Celluloid::Future.new { Work.new(input: {url: url}, id: url) }
end.
map do |future_work|
screenshot_pool.future.perform(future_work)
end.
map do |future_work|
crop_pool.future.perform(future_work)
end
puts '(main) Waiting for remaining jobs'
results = futures.map { |future_work| future_work.value }
puts '(main) Cleaning'
results
.map { |result| result.shift!.artefacts }
.flatten
#.select { |artefact| artefact[:type] == :file }
#.each { |file| FileUtils.rm_f file }
end
end
end

7
lib/webgalien/sitemap.rb Normal file
View file

@ -0,0 +1,7 @@
module Webgalien
class Sitemap
def self.start(output_path:)
end
end
end

4
lib/webgalien/version.rb Normal file
View file

@ -0,0 +1,4 @@
module Webgalien
VERSION = '0.1.0'.freeze
end

586
package-lock.json generated Normal file
View file

@ -0,0 +1,586 @@
{
"name": "presse-cote-d-ivoire",
"version": "1.0.0",
"lockfileVersion": 1,
"requires": true,
"dependencies": {
"ajv": {
"version": "5.5.2",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-5.5.2.tgz",
"integrity": "sha1-c7Xuyj+rZT49P5Qis0GtQiBdyWU=",
"requires": {
"co": "4.6.0",
"fast-deep-equal": "1.1.0",
"fast-json-stable-stringify": "2.0.0",
"json-schema-traverse": "0.3.1"
}
},
"asn1": {
"version": "0.2.4",
"resolved": "https://registry.npmjs.org/asn1/-/asn1-0.2.4.tgz",
"integrity": "sha512-jxwzQpLQjSmWXgwaCZE9Nz+glAG01yF1QnWgbhGwHI5A6FRIEY6IVqtHhIepHqI7/kyEyQEagBC5mBEFlIYvdg==",
"requires": {
"safer-buffer": "2.1.2"
}
},
"assert-plus": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/assert-plus/-/assert-plus-1.0.0.tgz",
"integrity": "sha1-8S4PPF13sLHN2RRpQuTpbB5N1SU="
},
"asynckit": {
"version": "0.4.0",
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
"integrity": "sha1-x57Zf380y48robyXkLzDZkdLS3k="
},
"aws-sign2": {
"version": "0.7.0",
"resolved": "https://registry.npmjs.org/aws-sign2/-/aws-sign2-0.7.0.tgz",
"integrity": "sha1-tG6JCTSpWR8tL2+G1+ap8bP+dqg="
},
"aws4": {
"version": "1.8.0",
"resolved": "https://registry.npmjs.org/aws4/-/aws4-1.8.0.tgz",
"integrity": "sha512-ReZxvNHIOv88FlT7rxcXIIC0fPt4KZqZbOlivyWtXLt8ESx84zd3kMC6iK5jVeS2qt+g7ftS7ye4fi06X5rtRQ=="
},
"bcrypt-pbkdf": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.2.tgz",
"integrity": "sha1-pDAdOJtqQ/m2f/PKEaP2Y342Dp4=",
"optional": true,
"requires": {
"tweetnacl": "0.14.5"
}
},
"buffer-from": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.1.tgz",
"integrity": "sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A=="
},
"caseless": {
"version": "0.12.0",
"resolved": "https://registry.npmjs.org/caseless/-/caseless-0.12.0.tgz",
"integrity": "sha1-G2gcIf+EAzyCZUMJBolCDRhxUdw="
},
"casperjs": {
"version": "1.1.4",
"resolved": "https://registry.npmjs.org/casperjs/-/casperjs-1.1.4.tgz",
"integrity": "sha1-6wH07YWsUgqPTZMrTap00+d7x0Y="
},
"co": {
"version": "4.6.0",
"resolved": "https://registry.npmjs.org/co/-/co-4.6.0.tgz",
"integrity": "sha1-bqa989hTrlTMuOR7+gvz+QMfsYQ="
},
"combined-stream": {
"version": "1.0.6",
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.6.tgz",
"integrity": "sha1-cj599ugBrFYTETp+RFqbactjKBg=",
"requires": {
"delayed-stream": "1.0.0"
}
},
"concat-stream": {
"version": "1.6.2",
"resolved": "https://registry.npmjs.org/concat-stream/-/concat-stream-1.6.2.tgz",
"integrity": "sha512-27HBghJxjiZtIk3Ycvn/4kbJk/1uZuJFfuPEns6LaEvpvG1f0hTea8lilrouyo9mVc2GWdcEZ8OLoGmSADlrCw==",
"requires": {
"buffer-from": "1.1.1",
"inherits": "2.0.3",
"readable-stream": "2.3.6",
"typedarray": "0.0.6"
}
},
"core-util-is": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz",
"integrity": "sha1-tf1UIgqivFq1eqtxQMlAdUUDwac="
},
"dashdash": {
"version": "1.14.1",
"resolved": "https://registry.npmjs.org/dashdash/-/dashdash-1.14.1.tgz",
"integrity": "sha1-hTz6D3y+L+1d4gMmuN1YEDX24vA=",
"requires": {
"assert-plus": "1.0.0"
}
},
"debug": {
"version": "2.6.9",
"resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
"integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
"requires": {
"ms": "2.0.0"
}
},
"delayed-stream": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
"integrity": "sha1-3zrhmayt+31ECqrgsp4icrJOxhk="
},
"ecc-jsbn": {
"version": "0.1.2",
"resolved": "https://registry.npmjs.org/ecc-jsbn/-/ecc-jsbn-0.1.2.tgz",
"integrity": "sha1-OoOpBOVDUyh4dMVkt1SThoSamMk=",
"optional": true,
"requires": {
"jsbn": "0.1.1",
"safer-buffer": "2.1.2"
}
},
"es6-promise": {
"version": "4.2.4",
"resolved": "https://registry.npmjs.org/es6-promise/-/es6-promise-4.2.4.tgz",
"integrity": "sha512-/NdNZVJg+uZgtm9eS3O6lrOLYmQag2DjdEXuPaHlZ6RuVqgqaVZfgYCepEIKsLqwdQArOPtC3XzRLqGGfT8KQQ=="
},
"extend": {
"version": "3.0.2",
"resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz",
"integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g=="
},
"extract-zip": {
"version": "1.6.7",
"resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-1.6.7.tgz",
"integrity": "sha1-qEC0uK9kAyZMjbV/Txp0Mz74H+k=",
"requires": {
"concat-stream": "1.6.2",
"debug": "2.6.9",
"mkdirp": "0.5.1",
"yauzl": "2.4.1"
}
},
"extsprintf": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/extsprintf/-/extsprintf-1.3.0.tgz",
"integrity": "sha1-lpGEQOMEGnpBT4xS48V06zw+HgU="
},
"fast-deep-equal": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-1.1.0.tgz",
"integrity": "sha1-wFNHeBfIa1HaqFPIHgWbcz0CNhQ="
},
"fast-json-stable-stringify": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.0.0.tgz",
"integrity": "sha1-1RQsDK7msRifh9OnYREGT4bIu/I="
},
"fd-slicer": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/fd-slicer/-/fd-slicer-1.0.1.tgz",
"integrity": "sha1-i1vL2ewyfFBBv5qwI/1nUPEXfmU=",
"requires": {
"pend": "1.2.0"
}
},
"forever-agent": {
"version": "0.6.1",
"resolved": "https://registry.npmjs.org/forever-agent/-/forever-agent-0.6.1.tgz",
"integrity": "sha1-+8cfDEGt6zf5bFd60e1C2P2sypE="
},
"form-data": {
"version": "2.3.2",
"resolved": "https://registry.npmjs.org/form-data/-/form-data-2.3.2.tgz",
"integrity": "sha1-SXBJi+YEwgwAXU9cI67NIda0kJk=",
"requires": {
"asynckit": "0.4.0",
"combined-stream": "1.0.6",
"mime-types": "2.1.20"
}
},
"fs-extra": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-1.0.0.tgz",
"integrity": "sha1-zTzl9+fLYUWIP8rjGR6Yd/hYeVA=",
"requires": {
"graceful-fs": "4.1.11",
"jsonfile": "2.4.0",
"klaw": "1.3.1"
}
},
"getpass": {
"version": "0.1.7",
"resolved": "https://registry.npmjs.org/getpass/-/getpass-0.1.7.tgz",
"integrity": "sha1-Xv+OPmhNVprkyysSgmBOi6YhSfo=",
"requires": {
"assert-plus": "1.0.0"
}
},
"graceful-fs": {
"version": "4.1.11",
"resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.1.11.tgz",
"integrity": "sha1-Dovf5NHduIVNZOBOp8AOKgJuVlg="
},
"har-schema": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/har-schema/-/har-schema-2.0.0.tgz",
"integrity": "sha1-qUwiJOvKwEeCoNkDVSHyRzW37JI="
},
"har-validator": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/har-validator/-/har-validator-5.1.0.tgz",
"integrity": "sha512-+qnmNjI4OfH2ipQ9VQOw23bBd/ibtfbVdK2fYbY4acTDqKTW/YDp9McimZdDbG8iV9fZizUqQMD5xvriB146TA==",
"requires": {
"ajv": "5.5.2",
"har-schema": "2.0.0"
}
},
"hasha": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/hasha/-/hasha-2.2.0.tgz",
"integrity": "sha1-eNfL/B5tZjA/55g3NlmEUXsvbuE=",
"requires": {
"is-stream": "1.1.0",
"pinkie-promise": "2.0.1"
}
},
"http-signature": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/http-signature/-/http-signature-1.2.0.tgz",
"integrity": "sha1-muzZJRFHcvPZW2WmCruPfBj7rOE=",
"requires": {
"assert-plus": "1.0.0",
"jsprim": "1.4.1",
"sshpk": "1.14.2"
}
},
"inherits": {
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.3.tgz",
"integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4="
},
"is-stream": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/is-stream/-/is-stream-1.1.0.tgz",
"integrity": "sha1-EtSj3U5o4Lec6428hBc66A2RykQ="
},
"is-typedarray": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/is-typedarray/-/is-typedarray-1.0.0.tgz",
"integrity": "sha1-5HnICFjfDBsR3dppQPlgEfzaSpo="
},
"isarray": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz",
"integrity": "sha1-u5NdSFgsuhaMBoNJV6VKPgcSTxE="
},
"isexe": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
"integrity": "sha1-6PvzdNxVb/iUehDcsFctYz8s+hA="
},
"isstream": {
"version": "0.1.2",
"resolved": "https://registry.npmjs.org/isstream/-/isstream-0.1.2.tgz",
"integrity": "sha1-R+Y/evVa+m+S4VAOaQ64uFKcCZo="
},
"jsbn": {
"version": "0.1.1",
"resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz",
"integrity": "sha1-peZUwuWi3rXyAdls77yoDA7y9RM=",
"optional": true
},
"json-schema": {
"version": "0.2.3",
"resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.2.3.tgz",
"integrity": "sha1-tIDIkuWaLwWVTOcnvT8qTogvnhM="
},
"json-schema-traverse": {
"version": "0.3.1",
"resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.3.1.tgz",
"integrity": "sha1-NJptRMU6Ud6JtAgFxdXlm0F9M0A="
},
"json-stringify-safe": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz",
"integrity": "sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus="
},
"jsonfile": {
"version": "2.4.0",
"resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-2.4.0.tgz",
"integrity": "sha1-NzaitCi4e72gzIO1P6PWM6NcKug=",
"requires": {
"graceful-fs": "4.1.11"
}
},
"jsprim": {
"version": "1.4.1",
"resolved": "https://registry.npmjs.org/jsprim/-/jsprim-1.4.1.tgz",
"integrity": "sha1-MT5mvB5cwG5Di8G3SZwuXFastqI=",
"requires": {
"assert-plus": "1.0.0",
"extsprintf": "1.3.0",
"json-schema": "0.2.3",
"verror": "1.10.0"
}
},
"kew": {
"version": "0.7.0",
"resolved": "https://registry.npmjs.org/kew/-/kew-0.7.0.tgz",
"integrity": "sha1-edk9LTM2PW/dKXCzNdkUGtWR15s="
},
"klaw": {
"version": "1.3.1",
"resolved": "https://registry.npmjs.org/klaw/-/klaw-1.3.1.tgz",
"integrity": "sha1-QIhDO0azsbolnXh4XY6W9zugJDk=",
"requires": {
"graceful-fs": "4.1.11"
}
},
"mime-db": {
"version": "1.36.0",
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.36.0.tgz",
"integrity": "sha512-L+xvyD9MkoYMXb1jAmzI/lWYAxAMCPvIBSWur0PZ5nOf5euahRLVqH//FKW9mWp2lkqUgYiXPgkzfMUFi4zVDw=="
},
"mime-types": {
"version": "2.1.20",
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.20.tgz",
"integrity": "sha512-HrkrPaP9vGuWbLK1B1FfgAkbqNjIuy4eHlIYnFi7kamZyLLrGlo2mpcx0bBmNpKqBtYtAfGbodDddIgddSJC2A==",
"requires": {
"mime-db": "1.36.0"
}
},
"minimist": {
"version": "0.0.8",
"resolved": "http://registry.npmjs.org/minimist/-/minimist-0.0.8.tgz",
"integrity": "sha1-hX/Kv8M5fSYluCKCYuhqp6ARsF0="
},
"mkdirp": {
"version": "0.5.1",
"resolved": "http://registry.npmjs.org/mkdirp/-/mkdirp-0.5.1.tgz",
"integrity": "sha1-MAV0OOrGz3+MR2fzhkjWaX11yQM=",
"requires": {
"minimist": "0.0.8"
}
},
"ms": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
"integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g="
},
"oauth-sign": {
"version": "0.9.0",
"resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.9.0.tgz",
"integrity": "sha512-fexhUFFPTGV8ybAtSIGbV6gOkSv8UtRbDBnAyLQw4QPKkgNlsH2ByPGtMUqdWkos6YCRmAqViwgZrJc/mRDzZQ=="
},
"pend": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz",
"integrity": "sha1-elfrVQpng/kRUzH89GY9XI4AelA="
},
"performance-now": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/performance-now/-/performance-now-2.1.0.tgz",
"integrity": "sha1-Ywn04OX6kT7BxpMHrjZLSzd8nns="
},
"phantomjs-prebuilt": {
"version": "2.1.16",
"resolved": "https://registry.npmjs.org/phantomjs-prebuilt/-/phantomjs-prebuilt-2.1.16.tgz",
"integrity": "sha1-79ISpKOWbTZHaE6ouniFSb4q7+8=",
"requires": {
"es6-promise": "4.2.4",
"extract-zip": "1.6.7",
"fs-extra": "1.0.0",
"hasha": "2.2.0",
"kew": "0.7.0",
"progress": "1.1.8",
"request": "2.88.0",
"request-progress": "2.0.1",
"which": "1.3.1"
}
},
"pinkie": {
"version": "2.0.4",
"resolved": "https://registry.npmjs.org/pinkie/-/pinkie-2.0.4.tgz",
"integrity": "sha1-clVrgM+g1IqXToDnckjoDtT3+HA="
},
"pinkie-promise": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/pinkie-promise/-/pinkie-promise-2.0.1.tgz",
"integrity": "sha1-ITXW36ejWMBprJsXh3YogihFD/o=",
"requires": {
"pinkie": "2.0.4"
}
},
"process-nextick-args": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.0.tgz",
"integrity": "sha512-MtEC1TqN0EU5nephaJ4rAtThHtC86dNN9qCuEhtshvpVBkAW5ZO7BASN9REnF9eoXGcRub+pFuKEpOHE+HbEMw=="
},
"progress": {
"version": "1.1.8",
"resolved": "https://registry.npmjs.org/progress/-/progress-1.1.8.tgz",
"integrity": "sha1-4mDHj2Fhzdmw5WzD4Khd4Xx6V74="
},
"psl": {
"version": "1.1.29",
"resolved": "https://registry.npmjs.org/psl/-/psl-1.1.29.tgz",
"integrity": "sha512-AeUmQ0oLN02flVHXWh9sSJF7mcdFq0ppid/JkErufc3hGIV/AMa8Fo9VgDo/cT2jFdOWoFvHp90qqBH54W+gjQ=="
},
"punycode": {
"version": "1.4.1",
"resolved": "https://registry.npmjs.org/punycode/-/punycode-1.4.1.tgz",
"integrity": "sha1-wNWmOycYgArY4esPpSachN1BhF4="
},
"qs": {
"version": "6.5.2",
"resolved": "https://registry.npmjs.org/qs/-/qs-6.5.2.tgz",
"integrity": "sha512-N5ZAX4/LxJmF+7wN74pUD6qAh9/wnvdQcjq9TZjevvXzSUo7bfmw91saqMjzGS2xq91/odN2dW/WOl7qQHNDGA=="
},
"readable-stream": {
"version": "2.3.6",
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.6.tgz",
"integrity": "sha512-tQtKA9WIAhBF3+VLAseyMqZeBjW0AHJoxOtYqSUZNJxauErmLbVm2FW1y+J/YA9dUrAC39ITejlZWhVIwawkKw==",
"requires": {
"core-util-is": "1.0.2",
"inherits": "2.0.3",
"isarray": "1.0.0",
"process-nextick-args": "2.0.0",
"safe-buffer": "5.1.2",
"string_decoder": "1.1.1",
"util-deprecate": "1.0.2"
}
},
"request": {
"version": "2.88.0",
"resolved": "https://registry.npmjs.org/request/-/request-2.88.0.tgz",
"integrity": "sha512-NAqBSrijGLZdM0WZNsInLJpkJokL72XYjUpnB0iwsRgxh7dB6COrHnTBNwN0E+lHDAJzu7kLAkDeY08z2/A0hg==",
"requires": {
"aws-sign2": "0.7.0",
"aws4": "1.8.0",
"caseless": "0.12.0",
"combined-stream": "1.0.6",
"extend": "3.0.2",
"forever-agent": "0.6.1",
"form-data": "2.3.2",
"har-validator": "5.1.0",
"http-signature": "1.2.0",
"is-typedarray": "1.0.0",
"isstream": "0.1.2",
"json-stringify-safe": "5.0.1",
"mime-types": "2.1.20",
"oauth-sign": "0.9.0",
"performance-now": "2.1.0",
"qs": "6.5.2",
"safe-buffer": "5.1.2",
"tough-cookie": "2.4.3",
"tunnel-agent": "0.6.0",
"uuid": "3.3.2"
}
},
"request-progress": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/request-progress/-/request-progress-2.0.1.tgz",
"integrity": "sha1-XTa7V5YcZzqlt4jbyBQf3yO0Tgg=",
"requires": {
"throttleit": "1.0.0"
}
},
"safe-buffer": {
"version": "5.1.2",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g=="
},
"safer-buffer": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
"integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="
},
"slimerjs": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/slimerjs/-/slimerjs-1.0.0.tgz",
"integrity": "sha1-xepFdUGh7NZXiSwOATYwq8aaIJE="
},
"sshpk": {
"version": "1.14.2",
"resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.14.2.tgz",
"integrity": "sha1-xvxhZIo9nE52T9P8306hBeSSupg=",
"requires": {
"asn1": "0.2.4",
"assert-plus": "1.0.0",
"bcrypt-pbkdf": "1.0.2",
"dashdash": "1.14.1",
"ecc-jsbn": "0.1.2",
"getpass": "0.1.7",
"jsbn": "0.1.1",
"safer-buffer": "2.1.2",
"tweetnacl": "0.14.5"
}
},
"string_decoder": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz",
"integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==",
"requires": {
"safe-buffer": "5.1.2"
}
},
"throttleit": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/throttleit/-/throttleit-1.0.0.tgz",
"integrity": "sha1-nnhYNtr0Z0MUWlmEtiaNgoUorGw="
},
"tough-cookie": {
"version": "2.4.3",
"resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.4.3.tgz",
"integrity": "sha512-Q5srk/4vDM54WJsJio3XNn6K2sCG+CQ8G5Wz6bZhRZoAe/+TxjWB/GlFAnYEbkYVlON9FMk/fE3h2RLpPXo4lQ==",
"requires": {
"psl": "1.1.29",
"punycode": "1.4.1"
}
},
"tunnel-agent": {
"version": "0.6.0",
"resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz",
"integrity": "sha1-J6XeoGs2sEoKmWZ3SykIaPD8QP0=",
"requires": {
"safe-buffer": "5.1.2"
}
},
"tweetnacl": {
"version": "0.14.5",
"resolved": "https://registry.npmjs.org/tweetnacl/-/tweetnacl-0.14.5.tgz",
"integrity": "sha1-WuaBd/GS1EViadEIr6k/+HQ/T2Q=",
"optional": true
},
"typedarray": {
"version": "0.0.6",
"resolved": "https://registry.npmjs.org/typedarray/-/typedarray-0.0.6.tgz",
"integrity": "sha1-hnrHTjhkGHsdPUfZlqeOxciDB3c="
},
"util-deprecate": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
"integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8="
},
"uuid": {
"version": "3.3.2",
"resolved": "https://registry.npmjs.org/uuid/-/uuid-3.3.2.tgz",
"integrity": "sha512-yXJmeNaw3DnnKAOKJE51sL/ZaYfWJRl1pK9dr19YFCu0ObS231AB1/LbqTKRAQ5kw8A90rA6fr4riOUpTZvQZA=="
},
"verror": {
"version": "1.10.0",
"resolved": "https://registry.npmjs.org/verror/-/verror-1.10.0.tgz",
"integrity": "sha1-OhBcoXBTr1XW4nDB+CiGguGNpAA=",
"requires": {
"assert-plus": "1.0.0",
"core-util-is": "1.0.2",
"extsprintf": "1.3.0"
}
},
"which": {
"version": "1.3.1",
"resolved": "https://registry.npmjs.org/which/-/which-1.3.1.tgz",
"integrity": "sha512-HxJdYWq1MTIQbJ3nw0cqssHoTNU267KlrDuGZ1WYlxDStUtKUhOaJmh112/TZmHxxUfuJqPXSOm7tDyas0OSIQ==",
"requires": {
"isexe": "2.0.0"
}
},
"yauzl": {
"version": "2.4.1",
"resolved": "https://registry.npmjs.org/yauzl/-/yauzl-2.4.1.tgz",
"integrity": "sha1-lSj0QtqxsihOWLQ3m7GU4i4MQAU=",
"requires": {
"fd-slicer": "1.0.1"
}
}
}
}

16
package.json Normal file
View file

@ -0,0 +1,16 @@
{
"name": "presse-cote-d-ivoire",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC",
"dependencies": {
"casperjs": "^1.1.4",
"phantomjs-prebuilt": "^2.1.16",
"slimerjs": "^1.0.0"
}
}

17
test.rb Executable file
View file

@ -0,0 +1,17 @@
#!/usr/bin/env ruby
urls = %w[
https://boutique.pressecotedivoire.fr/reading?id=SOIRINFO7163pZyPXQ#page=1
https://boutique.pressecotedivoire.fr/reading?id=NOTREVOIE5947jneW2f#page=1
https://boutique.pressecotedivoire.fr/reading?id=LGINFOS1979CHPFdD#page=1
https://boutique.pressecotedivoire.fr/reading?id=LETEMPS4463XBeNW4#page=1
]
urls.each do |url|
cmd = "bundle exec ./download.rb \"#{url}\""
system cmd
unless $?.success?
# try again
system cmd
end
end

13
theyellowbridge.yml Normal file
View file

@ -0,0 +1,13 @@
root: http://tyb.phtechno.com
pages:
- /nos-offres/
- /nos-offres/accompagnement-transformation/
- /nos-offres/developpement-cohesion-sociale/
- /nos-offres/digital-learning/
- /nos-offres/gestion-agile-competences/
- /nos-offres/mutualisation-ressources-rh/
- /nos-offres/realisation-etudes-prospectives/
- /notre-reseau/
- /contact/
- /cookies-et-confidentialite/
- /mentions-legales/

29
webgalien.gemspec Normal file
View file

@ -0,0 +1,29 @@
# coding: utf-8
lib = File.expand_path('../lib', __FILE__)
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
require 'webgalien/version'
Gem::Specification.new do |spec|
spec.name = "webgalien"
spec.version = Webgalien::VERSION
spec.authors = ["Glenn Y. Rolland"]
spec.email = ["glenux@glenux.net"]
spec.summary = %q{Use GIT logs to give an estimation of spent time & costs of your projects.}
spec.description = %q{Use GIT logs to give an estimation of spent time & costs of your projects.}
spec.homepage = "https://github.com/glenux/webgalien"
spec.license = "MIT"
spec.files = `git ls-files -z`.split("\x0")
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
spec.require_paths = ["lib"]
spec.add_runtime_dependency "selenium-webdriver", "~> 3.14"
spec.add_runtime_dependency 'colorize', '~> 0.8.1'
spec.add_runtime_dependency 'celluloid', '~> 0.17.3'
spec.add_runtime_dependency 'thor', '~> 0.20.0'
spec.add_development_dependency "bundler"
spec.add_development_dependency "rake"
spec.add_development_dependency "minitest"
end