commit 521b525a2dd58e7654528bd48c9804a00816992b Author: Glenn Y. Rolland Date: Fri Sep 21 01:25:39 2018 +0200 Improve code diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7f082be --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +cache/ +node_modules/ +*.pdf diff --git a/.rubocop b/.rubocop new file mode 100644 index 0000000..b5d3ae9 --- /dev/null +++ b/.rubocop @@ -0,0 +1,2 @@ +Metrics/Length: + Max: 15 diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..bb94df8 --- /dev/null +++ b/Gemfile @@ -0,0 +1,4 @@ +# frozen_string_literal: true + +source "https://rubygems.org" +gemspec diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..0bf6753 --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,55 @@ +PATH + remote: . + specs: + webgalien (0.1.0) + celluloid (~> 0.17.3) + colorize (~> 0.8.1) + selenium-webdriver (~> 3.14) + thor (~> 0.20.0) + +GEM + remote: https://rubygems.org/ + specs: + celluloid (0.17.3) + celluloid-essentials + celluloid-extras + celluloid-fsm + celluloid-pool + celluloid-supervision + timers (>= 4.1.1) + celluloid-essentials (0.20.5) + timers (>= 4.1.1) + celluloid-extras (0.20.5) + timers (>= 4.1.1) + celluloid-fsm (0.20.5) + timers (>= 4.1.1) + celluloid-pool (0.20.5) + timers (>= 4.1.1) + celluloid-supervision (0.20.6) + timers (>= 4.1.1) + childprocess (0.9.0) + ffi (~> 1.0, >= 1.0.11) + colorize (0.8.1) + ffi (1.9.25) + hitimes (1.3.0) + minitest (5.11.3) + rake (12.3.1) + rubyzip (1.2.2) + selenium-webdriver (3.14.0) + childprocess (~> 0.5) + rubyzip (~> 1.2) + thor (0.20.0) + timers (4.1.2) + hitimes + +PLATFORMS + ruby + +DEPENDENCIES + bundler + minitest + rake + webgalien! + +BUNDLED WITH + 1.16.0 diff --git a/README.md b/README.md new file mode 100644 index 0000000..f0e20bf --- /dev/null +++ b/README.md @@ -0,0 +1,13 @@ + +Prerequisites + + apt-get install img2pdf imagemagick + + +Increase memory in imagemagick policy `/etc/ImageMagick-6/policy.xml` + + + +window.$FlowPaper('documentViewer').config +window.$FlowPaper('documentViewer').switchMode('portrait') + diff --git a/bin/install b/bin/install new file mode 100755 index 0000000..4979564 --- /dev/null +++ b/bin/install @@ -0,0 +1,10 @@ +#!/bin/sh + +set -e +ROOTDIR="$(cd "$(dirname "$0")/.." || exit 1 ; pwd)" + +cd "$ROOTDIR" + +npm install + +docker pull vitr/casperjs diff --git a/bin/install.selenium b/bin/install.selenium new file mode 100644 index 0000000..6b71148 --- /dev/null +++ b/bin/install.selenium @@ -0,0 +1,18 @@ +# platform options: linux32, linux64, mac64, win32 +PLATFORM=linux64 + +# the directory to install the chromedriver binary in +INSTALLATION_DIRECTORY=~/bin/ +# create it if necessary +mkdir -p $INSTALLATION_DIRECTORY +# and add it to PATH +export PATH="$INSTALLATION_DIRECTORY:$PATH" +# and add the PATH modification to ~/.bashrc +echo 'export PATH="$INSTALLATION_DIRECTORY:$PATH"' >> ~/.bashrc + +# automatically find the latest version +VERSION=$(curl http://chromedriver.storage.googleapis.com/LATEST_RELEASE) + +# download and extract the latest version +curl http://chromedriver.storage.googleapis.com/$VERSION/chromedriver_$PLATFORM.zip \ +| bsdtar -xvf - -C $INSTALLATION_DIRECTORY diff --git a/bin/run b/bin/run new file mode 100755 index 0000000..c6dd191 --- /dev/null +++ b/bin/run @@ -0,0 +1,20 @@ +#!/bin/sh + + +# --restart always \ +# docker run -d \ +# --rm \ +# --name casperjs-daemon \ +# -v $(pwd):/home/casperjs-tests \ +# vitr/casperjs + #--engine=slimerjs \ + +export PATH="$(npm bin):$PATH" +$(npm bin)/casperjs \ + --log-level=debug \ + --engine=slimerjs \ + ./download.js \ + "URL" + +# docker exec casperjs-daemon phantomjs download.js "URL" +# docker stop casperjs-daemon diff --git a/bin/uninstall b/bin/uninstall new file mode 100755 index 0000000..d294f0c --- /dev/null +++ b/bin/uninstall @@ -0,0 +1,4 @@ +#!/bin/sh + +docker stop casperjs-daemon + diff --git a/credential.yml b/credential.yml new file mode 100644 index 0000000..904481a --- /dev/null +++ b/credential.yml @@ -0,0 +1,3 @@ + +username: +password: diff --git a/exe/webgalien b/exe/webgalien new file mode 100755 index 0000000..554045f --- /dev/null +++ b/exe/webgalien @@ -0,0 +1,6 @@ +#!/usr/bin/env ruby + +require 'webgalien' + +Webgalien::Cli.start(ARGV) + diff --git a/lib/webgalien.rb b/lib/webgalien.rb new file mode 100644 index 0000000..6d4e705 --- /dev/null +++ b/lib/webgalien.rb @@ -0,0 +1,21 @@ + +# Load external dependencies +require 'thor' +require 'celluloid/current' +require 'selenium-webdriver' +require 'yaml' +require 'thor' +require 'colorize' +require 'fileutils' + +# Load actors +require 'webgalien/actors/crop_png_actor' +require 'webgalien/actors/screenshot_actor' +require 'webgalien/actors/work_actor' + +require 'webgalien/screenshot' +require 'webgalien/sitemap' + +# Load cli +require 'webgalien/cli' + diff --git a/lib/webgalien/actors/crop_png_actor.rb b/lib/webgalien/actors/crop_png_actor.rb new file mode 100644 index 0000000..d8ada18 --- /dev/null +++ b/lib/webgalien/actors/crop_png_actor.rb @@ -0,0 +1,24 @@ + +# Actor for cropping png +module Webgalien +class CropPngActor + include Celluloid + + def perform(work_future) + work = work_future.value + work.shift! + + input_path = work.input[:path] + bbox = work.input[:bbox] + output_path = File.join(TMP_PREFIX, 'crop-' + work.id + '.png') + + puts "(#{work.id}) cropping capture".green + system 'convert ' \ + "-crop #{bbox[:w]}x#{bbox[:h]}+#{bbox[:x]}+#{bbox[:y]} " \ + "#{input_path} #{output_path}" + + work.output = { path: output_path } + work + end +end +end diff --git a/lib/webgalien/actors/screenshot_actor.rb b/lib/webgalien/actors/screenshot_actor.rb new file mode 100644 index 0000000..3462b92 --- /dev/null +++ b/lib/webgalien/actors/screenshot_actor.rb @@ -0,0 +1,84 @@ +USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) ' \ + 'AppleWebKit/601.7.7 (KHTML, like Gecko) Version/9.1.2 Safari/601.7.7'.freeze + +# configure the driver to run in headless mode +# Selenium::WebDriver.logger.level = + +module Webgalien + class ScreenshotActor + include Celluloid + + def perform(future_work) + driver = initialize_selenium_driver + + work = future_work.value + work.shift! + + url = work.input[:url] + # Start selenium work + # platform_login driver + # Go to wanted page + driver.manage.window.resize_to(1440, 8000) + puts "(#{work.id}) loading page #{url}" + driver.navigate.to url + + puts "(#{work.id}) waiting DOM stability" + wait_dom_stability(driver) + + puts "(#{work.id}) getting page dimensions" + element = driver.find_element(:css, "body") + location = element.location + size = element.size + + bbox = { + w: size.width.to_i, + h: size.height.to_i, + x: location.x.to_i, + y: 0 # location.y.to_i + } + + puts "(#{work.id}) saving page" + FileUtils.mkdir_p(TMP_PREFIX) + tmp_path = File.join(TMP_PREFIX, 'capture-' + work.id.to_s + '.png') + driver.save_screenshot tmp_path + + driver.quit + + work.output = { + bbox: bbox, + path: tmp_path + } + work + end + + private + + def initialize_selenium_driver + options = Selenium::WebDriver::Chrome::Options.new + options.add_argument('--headless') + options.add_argument('--disable-gpu') + options.add_argument("--user-agent=#{USER_AGENT}") + + Selenium::WebDriver.for :chrome, options: options + end + + # wait for DOM structure to be stabilizer for 5 consecutive tries + def wait_dom_stability(driver) + dom_before = nil + dom_now = nil + dom_stability = 0 + + wait = Selenium::WebDriver::Wait.new(timeout: 120) + wait.until do + # save old dom + dom_before = dom_now + # get new dom + dom_now = driver.find_element(:css, 'body').attribute('innerHTML') + # test stability + dom_stability += 1 + dom_stability = 0 if dom_before != dom_now + (dom_stability > 15) + end + end + end +end diff --git a/lib/webgalien/actors/work_actor.rb b/lib/webgalien/actors/work_actor.rb new file mode 100644 index 0000000..0d8e666 --- /dev/null +++ b/lib/webgalien/actors/work_actor.rb @@ -0,0 +1,24 @@ + +module Webgalien + class Work + attr_reader :id, :input, :artefacts + attr_accessor :output + + def initialize(id:, input: ) + @id = id.to_s.gsub(/[^a-zA-Z0-9_-]/, '-') + @input = input + + @output = nil + @artefacts = [] + end + + # make pipe ready + def shift! + return if @output.nil? + @artefacts << @input + @input = @output + @output = nil + self + end + end +end diff --git a/lib/webgalien/cli.rb b/lib/webgalien/cli.rb new file mode 100644 index 0000000..e72bc09 --- /dev/null +++ b/lib/webgalien/cli.rb @@ -0,0 +1,37 @@ + +module Webgalien + class Cli < Thor + class_option :'user-agent', + aliases: '-ua', + banner: 'USER-AGENT', + type: :string, + desc: 'choose user agent (default Mozilla)' + + desc 'sitemap URL FILE', 'crawl site and export sitemap' + def sitemap url, file + end + + desc 'screenshot FILE', 'take screenshots for each page' + option :profile, + aliases: '-p', + banner: 'PROFILE', + type: :string, + desc: 'choose device profile / resolution (default 1440x900 on desktop pc)' + option :output, + aliases: '-o', + banner: 'OUTPUT-DIRECTORY', + type: :string, + default: '.', + desc: 'where resulting content will be produced' + + def screenshot file + config = YAML.load File.open(file) + prefixed_urls = config['pages'].map {|u| config['root'] + u } + + Screenshot.start( + urls: prefixed_urls, + output_path: options['output'] + ) + end + end +end diff --git a/lib/webgalien/screenshot.rb b/lib/webgalien/screenshot.rb new file mode 100644 index 0000000..d58b6c4 --- /dev/null +++ b/lib/webgalien/screenshot.rb @@ -0,0 +1,40 @@ + +module Webgalien + class Screenshot + def self.start(urls:, output_path:) + # Start workpools + cores = Celluloid.cores + screenshot_pool = ScreenshotActor.pool( + size: cores, + args: { output_path: output_path } + ) + crop_pool = CropPngActor.pool( + size: cores, + args: { output_path: output_path } + ) + + futures = + urls. + map do |url| + Celluloid::Future.new { Work.new(input: {url: url}, id: url) } + end. + map do |future_work| + screenshot_pool.future.perform(future_work) + end. + map do |future_work| + crop_pool.future.perform(future_work) + end + + puts '(main) Waiting for remaining jobs' + results = futures.map { |future_work| future_work.value } + + puts '(main) Cleaning' + results + .map { |result| result.shift!.artefacts } + .flatten + #.select { |artefact| artefact[:type] == :file } + #.each { |file| FileUtils.rm_f file } + + end + end +end diff --git a/lib/webgalien/sitemap.rb b/lib/webgalien/sitemap.rb new file mode 100644 index 0000000..4dd5a8b --- /dev/null +++ b/lib/webgalien/sitemap.rb @@ -0,0 +1,7 @@ + +module Webgalien + class Sitemap + def self.start(output_path:) + end + end +end diff --git a/lib/webgalien/version.rb b/lib/webgalien/version.rb new file mode 100644 index 0000000..3dbabac --- /dev/null +++ b/lib/webgalien/version.rb @@ -0,0 +1,4 @@ + +module Webgalien + VERSION = '0.1.0'.freeze +end diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..273fe4e --- /dev/null +++ b/package-lock.json @@ -0,0 +1,586 @@ +{ + "name": "presse-cote-d-ivoire", + "version": "1.0.0", + "lockfileVersion": 1, + "requires": true, + "dependencies": { + "ajv": { + "version": "5.5.2", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-5.5.2.tgz", + "integrity": "sha1-c7Xuyj+rZT49P5Qis0GtQiBdyWU=", + "requires": { + "co": "4.6.0", + "fast-deep-equal": "1.1.0", + "fast-json-stable-stringify": "2.0.0", + "json-schema-traverse": "0.3.1" + } + }, + "asn1": { + "version": "0.2.4", + "resolved": "https://registry.npmjs.org/asn1/-/asn1-0.2.4.tgz", + "integrity": "sha512-jxwzQpLQjSmWXgwaCZE9Nz+glAG01yF1QnWgbhGwHI5A6FRIEY6IVqtHhIepHqI7/kyEyQEagBC5mBEFlIYvdg==", + "requires": { + "safer-buffer": "2.1.2" + } + }, + "assert-plus": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/assert-plus/-/assert-plus-1.0.0.tgz", + "integrity": "sha1-8S4PPF13sLHN2RRpQuTpbB5N1SU=" + }, + "asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha1-x57Zf380y48robyXkLzDZkdLS3k=" + }, + "aws-sign2": { + "version": "0.7.0", + "resolved": "https://registry.npmjs.org/aws-sign2/-/aws-sign2-0.7.0.tgz", + "integrity": "sha1-tG6JCTSpWR8tL2+G1+ap8bP+dqg=" + }, + "aws4": { + "version": "1.8.0", + "resolved": "https://registry.npmjs.org/aws4/-/aws4-1.8.0.tgz", + "integrity": "sha512-ReZxvNHIOv88FlT7rxcXIIC0fPt4KZqZbOlivyWtXLt8ESx84zd3kMC6iK5jVeS2qt+g7ftS7ye4fi06X5rtRQ==" + }, + "bcrypt-pbkdf": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.2.tgz", + "integrity": "sha1-pDAdOJtqQ/m2f/PKEaP2Y342Dp4=", + "optional": true, + "requires": { + "tweetnacl": "0.14.5" + } + }, + "buffer-from": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.1.tgz", + "integrity": "sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A==" + }, + "caseless": { + "version": "0.12.0", + "resolved": "https://registry.npmjs.org/caseless/-/caseless-0.12.0.tgz", + "integrity": "sha1-G2gcIf+EAzyCZUMJBolCDRhxUdw=" + }, + "casperjs": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/casperjs/-/casperjs-1.1.4.tgz", + "integrity": "sha1-6wH07YWsUgqPTZMrTap00+d7x0Y=" + }, + "co": { + "version": "4.6.0", + "resolved": "https://registry.npmjs.org/co/-/co-4.6.0.tgz", + "integrity": "sha1-bqa989hTrlTMuOR7+gvz+QMfsYQ=" + }, + "combined-stream": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.6.tgz", + "integrity": "sha1-cj599ugBrFYTETp+RFqbactjKBg=", + "requires": { + "delayed-stream": "1.0.0" + } + }, + "concat-stream": { + "version": "1.6.2", + "resolved": "https://registry.npmjs.org/concat-stream/-/concat-stream-1.6.2.tgz", + "integrity": "sha512-27HBghJxjiZtIk3Ycvn/4kbJk/1uZuJFfuPEns6LaEvpvG1f0hTea8lilrouyo9mVc2GWdcEZ8OLoGmSADlrCw==", + "requires": { + "buffer-from": "1.1.1", + "inherits": "2.0.3", + "readable-stream": "2.3.6", + "typedarray": "0.0.6" + } + }, + "core-util-is": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz", + "integrity": "sha1-tf1UIgqivFq1eqtxQMlAdUUDwac=" + }, + "dashdash": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/dashdash/-/dashdash-1.14.1.tgz", + "integrity": "sha1-hTz6D3y+L+1d4gMmuN1YEDX24vA=", + "requires": { + "assert-plus": "1.0.0" + } + }, + "debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "requires": { + "ms": "2.0.0" + } + }, + "delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha1-3zrhmayt+31ECqrgsp4icrJOxhk=" + }, + "ecc-jsbn": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/ecc-jsbn/-/ecc-jsbn-0.1.2.tgz", + "integrity": "sha1-OoOpBOVDUyh4dMVkt1SThoSamMk=", + "optional": true, + "requires": { + "jsbn": "0.1.1", + "safer-buffer": "2.1.2" + } + }, + "es6-promise": { + "version": "4.2.4", + "resolved": "https://registry.npmjs.org/es6-promise/-/es6-promise-4.2.4.tgz", + "integrity": "sha512-/NdNZVJg+uZgtm9eS3O6lrOLYmQag2DjdEXuPaHlZ6RuVqgqaVZfgYCepEIKsLqwdQArOPtC3XzRLqGGfT8KQQ==" + }, + "extend": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", + "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==" + }, + "extract-zip": { + "version": "1.6.7", + "resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-1.6.7.tgz", + "integrity": "sha1-qEC0uK9kAyZMjbV/Txp0Mz74H+k=", + "requires": { + "concat-stream": "1.6.2", + "debug": "2.6.9", + "mkdirp": "0.5.1", + "yauzl": "2.4.1" + } + }, + "extsprintf": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/extsprintf/-/extsprintf-1.3.0.tgz", + "integrity": "sha1-lpGEQOMEGnpBT4xS48V06zw+HgU=" + }, + "fast-deep-equal": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-1.1.0.tgz", + "integrity": "sha1-wFNHeBfIa1HaqFPIHgWbcz0CNhQ=" + }, + "fast-json-stable-stringify": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.0.0.tgz", + "integrity": "sha1-1RQsDK7msRifh9OnYREGT4bIu/I=" + }, + "fd-slicer": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/fd-slicer/-/fd-slicer-1.0.1.tgz", + "integrity": "sha1-i1vL2ewyfFBBv5qwI/1nUPEXfmU=", + "requires": { + "pend": "1.2.0" + } + }, + "forever-agent": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/forever-agent/-/forever-agent-0.6.1.tgz", + "integrity": "sha1-+8cfDEGt6zf5bFd60e1C2P2sypE=" + }, + "form-data": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-2.3.2.tgz", + "integrity": "sha1-SXBJi+YEwgwAXU9cI67NIda0kJk=", + "requires": { + "asynckit": "0.4.0", + "combined-stream": "1.0.6", + "mime-types": "2.1.20" + } + }, + "fs-extra": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-1.0.0.tgz", + "integrity": "sha1-zTzl9+fLYUWIP8rjGR6Yd/hYeVA=", + "requires": { + "graceful-fs": "4.1.11", + "jsonfile": "2.4.0", + "klaw": "1.3.1" + } + }, + "getpass": { + "version": "0.1.7", + "resolved": "https://registry.npmjs.org/getpass/-/getpass-0.1.7.tgz", + "integrity": "sha1-Xv+OPmhNVprkyysSgmBOi6YhSfo=", + "requires": { + "assert-plus": "1.0.0" + } + }, + "graceful-fs": { + "version": "4.1.11", + "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.1.11.tgz", + "integrity": "sha1-Dovf5NHduIVNZOBOp8AOKgJuVlg=" + }, + "har-schema": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/har-schema/-/har-schema-2.0.0.tgz", + "integrity": "sha1-qUwiJOvKwEeCoNkDVSHyRzW37JI=" + }, + "har-validator": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/har-validator/-/har-validator-5.1.0.tgz", + "integrity": "sha512-+qnmNjI4OfH2ipQ9VQOw23bBd/ibtfbVdK2fYbY4acTDqKTW/YDp9McimZdDbG8iV9fZizUqQMD5xvriB146TA==", + "requires": { + "ajv": "5.5.2", + "har-schema": "2.0.0" + } + }, + "hasha": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/hasha/-/hasha-2.2.0.tgz", + "integrity": "sha1-eNfL/B5tZjA/55g3NlmEUXsvbuE=", + "requires": { + "is-stream": "1.1.0", + "pinkie-promise": "2.0.1" + } + }, + "http-signature": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/http-signature/-/http-signature-1.2.0.tgz", + "integrity": "sha1-muzZJRFHcvPZW2WmCruPfBj7rOE=", + "requires": { + "assert-plus": "1.0.0", + "jsprim": "1.4.1", + "sshpk": "1.14.2" + } + }, + "inherits": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.3.tgz", + "integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=" + }, + "is-stream": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/is-stream/-/is-stream-1.1.0.tgz", + "integrity": "sha1-EtSj3U5o4Lec6428hBc66A2RykQ=" + }, + "is-typedarray": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-typedarray/-/is-typedarray-1.0.0.tgz", + "integrity": "sha1-5HnICFjfDBsR3dppQPlgEfzaSpo=" + }, + "isarray": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", + "integrity": "sha1-u5NdSFgsuhaMBoNJV6VKPgcSTxE=" + }, + "isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha1-6PvzdNxVb/iUehDcsFctYz8s+hA=" + }, + "isstream": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/isstream/-/isstream-0.1.2.tgz", + "integrity": "sha1-R+Y/evVa+m+S4VAOaQ64uFKcCZo=" + }, + "jsbn": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz", + "integrity": "sha1-peZUwuWi3rXyAdls77yoDA7y9RM=", + "optional": true + }, + "json-schema": { + "version": "0.2.3", + "resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.2.3.tgz", + "integrity": "sha1-tIDIkuWaLwWVTOcnvT8qTogvnhM=" + }, + "json-schema-traverse": { + "version": "0.3.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.3.1.tgz", + "integrity": "sha1-NJptRMU6Ud6JtAgFxdXlm0F9M0A=" + }, + "json-stringify-safe": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz", + "integrity": "sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus=" + }, + "jsonfile": { + "version": "2.4.0", + "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-2.4.0.tgz", + "integrity": "sha1-NzaitCi4e72gzIO1P6PWM6NcKug=", + "requires": { + "graceful-fs": "4.1.11" + } + }, + "jsprim": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/jsprim/-/jsprim-1.4.1.tgz", + "integrity": "sha1-MT5mvB5cwG5Di8G3SZwuXFastqI=", + "requires": { + "assert-plus": "1.0.0", + "extsprintf": "1.3.0", + "json-schema": "0.2.3", + "verror": "1.10.0" + } + }, + "kew": { + "version": "0.7.0", + "resolved": "https://registry.npmjs.org/kew/-/kew-0.7.0.tgz", + "integrity": "sha1-edk9LTM2PW/dKXCzNdkUGtWR15s=" + }, + "klaw": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/klaw/-/klaw-1.3.1.tgz", + "integrity": "sha1-QIhDO0azsbolnXh4XY6W9zugJDk=", + "requires": { + "graceful-fs": "4.1.11" + } + }, + "mime-db": { + "version": "1.36.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.36.0.tgz", + "integrity": "sha512-L+xvyD9MkoYMXb1jAmzI/lWYAxAMCPvIBSWur0PZ5nOf5euahRLVqH//FKW9mWp2lkqUgYiXPgkzfMUFi4zVDw==" + }, + "mime-types": { + "version": "2.1.20", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.20.tgz", + "integrity": "sha512-HrkrPaP9vGuWbLK1B1FfgAkbqNjIuy4eHlIYnFi7kamZyLLrGlo2mpcx0bBmNpKqBtYtAfGbodDddIgddSJC2A==", + "requires": { + "mime-db": "1.36.0" + } + }, + "minimist": { + "version": "0.0.8", + "resolved": "http://registry.npmjs.org/minimist/-/minimist-0.0.8.tgz", + "integrity": "sha1-hX/Kv8M5fSYluCKCYuhqp6ARsF0=" + }, + "mkdirp": { + "version": "0.5.1", + "resolved": "http://registry.npmjs.org/mkdirp/-/mkdirp-0.5.1.tgz", + "integrity": "sha1-MAV0OOrGz3+MR2fzhkjWaX11yQM=", + "requires": { + "minimist": "0.0.8" + } + }, + "ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=" + }, + "oauth-sign": { + "version": "0.9.0", + "resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.9.0.tgz", + "integrity": "sha512-fexhUFFPTGV8ybAtSIGbV6gOkSv8UtRbDBnAyLQw4QPKkgNlsH2ByPGtMUqdWkos6YCRmAqViwgZrJc/mRDzZQ==" + }, + "pend": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz", + "integrity": "sha1-elfrVQpng/kRUzH89GY9XI4AelA=" + }, + "performance-now": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/performance-now/-/performance-now-2.1.0.tgz", + "integrity": "sha1-Ywn04OX6kT7BxpMHrjZLSzd8nns=" + }, + "phantomjs-prebuilt": { + "version": "2.1.16", + "resolved": "https://registry.npmjs.org/phantomjs-prebuilt/-/phantomjs-prebuilt-2.1.16.tgz", + "integrity": "sha1-79ISpKOWbTZHaE6ouniFSb4q7+8=", + "requires": { + "es6-promise": "4.2.4", + "extract-zip": "1.6.7", + "fs-extra": "1.0.0", + "hasha": "2.2.0", + "kew": "0.7.0", + "progress": "1.1.8", + "request": "2.88.0", + "request-progress": "2.0.1", + "which": "1.3.1" + } + }, + "pinkie": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/pinkie/-/pinkie-2.0.4.tgz", + "integrity": "sha1-clVrgM+g1IqXToDnckjoDtT3+HA=" + }, + "pinkie-promise": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/pinkie-promise/-/pinkie-promise-2.0.1.tgz", + "integrity": "sha1-ITXW36ejWMBprJsXh3YogihFD/o=", + "requires": { + "pinkie": "2.0.4" + } + }, + "process-nextick-args": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.0.tgz", + "integrity": "sha512-MtEC1TqN0EU5nephaJ4rAtThHtC86dNN9qCuEhtshvpVBkAW5ZO7BASN9REnF9eoXGcRub+pFuKEpOHE+HbEMw==" + }, + "progress": { + "version": "1.1.8", + "resolved": "https://registry.npmjs.org/progress/-/progress-1.1.8.tgz", + "integrity": "sha1-4mDHj2Fhzdmw5WzD4Khd4Xx6V74=" + }, + "psl": { + "version": "1.1.29", + "resolved": "https://registry.npmjs.org/psl/-/psl-1.1.29.tgz", + "integrity": "sha512-AeUmQ0oLN02flVHXWh9sSJF7mcdFq0ppid/JkErufc3hGIV/AMa8Fo9VgDo/cT2jFdOWoFvHp90qqBH54W+gjQ==" + }, + "punycode": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-1.4.1.tgz", + "integrity": "sha1-wNWmOycYgArY4esPpSachN1BhF4=" + }, + "qs": { + "version": "6.5.2", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.5.2.tgz", + "integrity": "sha512-N5ZAX4/LxJmF+7wN74pUD6qAh9/wnvdQcjq9TZjevvXzSUo7bfmw91saqMjzGS2xq91/odN2dW/WOl7qQHNDGA==" + }, + "readable-stream": { + "version": "2.3.6", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.6.tgz", + "integrity": "sha512-tQtKA9WIAhBF3+VLAseyMqZeBjW0AHJoxOtYqSUZNJxauErmLbVm2FW1y+J/YA9dUrAC39ITejlZWhVIwawkKw==", + "requires": { + "core-util-is": "1.0.2", + "inherits": "2.0.3", + "isarray": "1.0.0", + "process-nextick-args": "2.0.0", + "safe-buffer": "5.1.2", + "string_decoder": "1.1.1", + "util-deprecate": "1.0.2" + } + }, + "request": { + "version": "2.88.0", + "resolved": "https://registry.npmjs.org/request/-/request-2.88.0.tgz", + "integrity": "sha512-NAqBSrijGLZdM0WZNsInLJpkJokL72XYjUpnB0iwsRgxh7dB6COrHnTBNwN0E+lHDAJzu7kLAkDeY08z2/A0hg==", + "requires": { + "aws-sign2": "0.7.0", + "aws4": "1.8.0", + "caseless": "0.12.0", + "combined-stream": "1.0.6", + "extend": "3.0.2", + "forever-agent": "0.6.1", + "form-data": "2.3.2", + "har-validator": "5.1.0", + "http-signature": "1.2.0", + "is-typedarray": "1.0.0", + "isstream": "0.1.2", + "json-stringify-safe": "5.0.1", + "mime-types": "2.1.20", + "oauth-sign": "0.9.0", + "performance-now": "2.1.0", + "qs": "6.5.2", + "safe-buffer": "5.1.2", + "tough-cookie": "2.4.3", + "tunnel-agent": "0.6.0", + "uuid": "3.3.2" + } + }, + "request-progress": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/request-progress/-/request-progress-2.0.1.tgz", + "integrity": "sha1-XTa7V5YcZzqlt4jbyBQf3yO0Tgg=", + "requires": { + "throttleit": "1.0.0" + } + }, + "safe-buffer": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==" + }, + "safer-buffer": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" + }, + "slimerjs": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/slimerjs/-/slimerjs-1.0.0.tgz", + "integrity": "sha1-xepFdUGh7NZXiSwOATYwq8aaIJE=" + }, + "sshpk": { + "version": "1.14.2", + "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.14.2.tgz", + "integrity": "sha1-xvxhZIo9nE52T9P8306hBeSSupg=", + "requires": { + "asn1": "0.2.4", + "assert-plus": "1.0.0", + "bcrypt-pbkdf": "1.0.2", + "dashdash": "1.14.1", + "ecc-jsbn": "0.1.2", + "getpass": "0.1.7", + "jsbn": "0.1.1", + "safer-buffer": "2.1.2", + "tweetnacl": "0.14.5" + } + }, + "string_decoder": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", + "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", + "requires": { + "safe-buffer": "5.1.2" + } + }, + "throttleit": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/throttleit/-/throttleit-1.0.0.tgz", + "integrity": "sha1-nnhYNtr0Z0MUWlmEtiaNgoUorGw=" + }, + "tough-cookie": { + "version": "2.4.3", + "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.4.3.tgz", + "integrity": "sha512-Q5srk/4vDM54WJsJio3XNn6K2sCG+CQ8G5Wz6bZhRZoAe/+TxjWB/GlFAnYEbkYVlON9FMk/fE3h2RLpPXo4lQ==", + "requires": { + "psl": "1.1.29", + "punycode": "1.4.1" + } + }, + "tunnel-agent": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz", + "integrity": "sha1-J6XeoGs2sEoKmWZ3SykIaPD8QP0=", + "requires": { + "safe-buffer": "5.1.2" + } + }, + "tweetnacl": { + "version": "0.14.5", + "resolved": "https://registry.npmjs.org/tweetnacl/-/tweetnacl-0.14.5.tgz", + "integrity": "sha1-WuaBd/GS1EViadEIr6k/+HQ/T2Q=", + "optional": true + }, + "typedarray": { + "version": "0.0.6", + "resolved": "https://registry.npmjs.org/typedarray/-/typedarray-0.0.6.tgz", + "integrity": "sha1-hnrHTjhkGHsdPUfZlqeOxciDB3c=" + }, + "util-deprecate": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=" + }, + "uuid": { + "version": "3.3.2", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-3.3.2.tgz", + "integrity": "sha512-yXJmeNaw3DnnKAOKJE51sL/ZaYfWJRl1pK9dr19YFCu0ObS231AB1/LbqTKRAQ5kw8A90rA6fr4riOUpTZvQZA==" + }, + "verror": { + "version": "1.10.0", + "resolved": "https://registry.npmjs.org/verror/-/verror-1.10.0.tgz", + "integrity": "sha1-OhBcoXBTr1XW4nDB+CiGguGNpAA=", + "requires": { + "assert-plus": "1.0.0", + "core-util-is": "1.0.2", + "extsprintf": "1.3.0" + } + }, + "which": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/which/-/which-1.3.1.tgz", + "integrity": "sha512-HxJdYWq1MTIQbJ3nw0cqssHoTNU267KlrDuGZ1WYlxDStUtKUhOaJmh112/TZmHxxUfuJqPXSOm7tDyas0OSIQ==", + "requires": { + "isexe": "2.0.0" + } + }, + "yauzl": { + "version": "2.4.1", + "resolved": "https://registry.npmjs.org/yauzl/-/yauzl-2.4.1.tgz", + "integrity": "sha1-lSj0QtqxsihOWLQ3m7GU4i4MQAU=", + "requires": { + "fd-slicer": "1.0.1" + } + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..18c1284 --- /dev/null +++ b/package.json @@ -0,0 +1,16 @@ +{ + "name": "presse-cote-d-ivoire", + "version": "1.0.0", + "description": "", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "author": "", + "license": "ISC", + "dependencies": { + "casperjs": "^1.1.4", + "phantomjs-prebuilt": "^2.1.16", + "slimerjs": "^1.0.0" + } +} diff --git a/test.rb b/test.rb new file mode 100755 index 0000000..459ad13 --- /dev/null +++ b/test.rb @@ -0,0 +1,17 @@ +#!/usr/bin/env ruby + +urls = %w[ + https://boutique.pressecotedivoire.fr/reading?id=SOIRINFO7163pZyPXQ#page=1 + https://boutique.pressecotedivoire.fr/reading?id=NOTREVOIE5947jneW2f#page=1 + https://boutique.pressecotedivoire.fr/reading?id=LGINFOS1979CHPFdD#page=1 + https://boutique.pressecotedivoire.fr/reading?id=LETEMPS4463XBeNW4#page=1 +] + +urls.each do |url| + cmd = "bundle exec ./download.rb \"#{url}\"" + system cmd + unless $?.success? + # try again + system cmd + end +end diff --git a/theyellowbridge.yml b/theyellowbridge.yml new file mode 100644 index 0000000..141cdb5 --- /dev/null +++ b/theyellowbridge.yml @@ -0,0 +1,13 @@ +root: http://tyb.phtechno.com +pages: + - /nos-offres/ + - /nos-offres/accompagnement-transformation/ + - /nos-offres/developpement-cohesion-sociale/ + - /nos-offres/digital-learning/ + - /nos-offres/gestion-agile-competences/ + - /nos-offres/mutualisation-ressources-rh/ + - /nos-offres/realisation-etudes-prospectives/ + - /notre-reseau/ + - /contact/ + - /cookies-et-confidentialite/ + - /mentions-legales/ diff --git a/webgalien.gemspec b/webgalien.gemspec new file mode 100644 index 0000000..8798fab --- /dev/null +++ b/webgalien.gemspec @@ -0,0 +1,29 @@ +# coding: utf-8 +lib = File.expand_path('../lib', __FILE__) +$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) +require 'webgalien/version' + +Gem::Specification.new do |spec| + spec.name = "webgalien" + spec.version = Webgalien::VERSION + spec.authors = ["Glenn Y. Rolland"] + spec.email = ["glenux@glenux.net"] + spec.summary = %q{Use GIT logs to give an estimation of spent time & costs of your projects.} + spec.description = %q{Use GIT logs to give an estimation of spent time & costs of your projects.} + spec.homepage = "https://github.com/glenux/webgalien" + spec.license = "MIT" + + spec.files = `git ls-files -z`.split("\x0") + spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } + spec.test_files = spec.files.grep(%r{^(test|spec|features)/}) + spec.require_paths = ["lib"] + + spec.add_runtime_dependency "selenium-webdriver", "~> 3.14" + spec.add_runtime_dependency 'colorize', '~> 0.8.1' + spec.add_runtime_dependency 'celluloid', '~> 0.17.3' + spec.add_runtime_dependency 'thor', '~> 0.20.0' + spec.add_development_dependency "bundler" + spec.add_development_dependency "rake" + spec.add_development_dependency "minitest" +end +