2016-06-16 23:32:08 +00:00
|
|
|
#!/usr/bin/env ruby
|
|
|
|
|
|
|
|
# set ts=2 sw=2 et
|
|
|
|
|
|
|
|
require 'pry'
|
|
|
|
require 'zlib'
|
|
|
|
require 'net/imap'
|
|
|
|
require 'pp'
|
|
|
|
require 'mechanize'
|
|
|
|
require 'yaml'
|
|
|
|
require 'hash_validator'
|
|
|
|
require 'uri'
|
|
|
|
require 'thor'
|
|
|
|
require 'json'
|
|
|
|
require 'mail'
|
|
|
|
require 'colorize'
|
|
|
|
|
|
|
|
#Net::IMAP.debug = true
|
|
|
|
|
|
|
|
class Hash
|
|
|
|
#take keys of hash and transform those to a symbols
|
|
|
|
def self.transform_keys_to_symbols(value)
|
|
|
|
return value if not value.is_a?(Hash)
|
|
|
|
hash = value.inject({}) do |memo,(k,v)|
|
|
|
|
memo[k.to_sym] = Hash.transform_keys_to_symbols(v); memo
|
|
|
|
end
|
|
|
|
return hash
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
module Ewoga
|
|
|
|
EWOGA_CONFIG_FILE = File.join(ENV['HOME'],'.ewoga','config.yml')
|
|
|
|
EWOGA_IGNORE_FILE = File.join(ENV['HOME'],'.ewoga','ignore.yml')
|
|
|
|
|
|
|
|
class InvalidConfiguration < Exception ; end
|
|
|
|
|
|
|
|
class CrawlerApp
|
|
|
|
attr_reader :imap
|
|
|
|
attr_reader :contacts
|
|
|
|
|
|
|
|
TMPMAIL_FILE = '.tmpmail'
|
|
|
|
|
|
|
|
def initialize config
|
|
|
|
@saved_key = 'RFC822'
|
|
|
|
@filter_headers = 'BODY[HEADER.FIELDS (FROM TO Subject)]'.upcase
|
|
|
|
@config = config
|
|
|
|
@imap = nil
|
2016-08-08 09:43:59 +00:00
|
|
|
puts @config.inspect
|
|
|
|
@match_re = Regexp.new(@config[:match])
|
2016-06-16 23:32:08 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
def connect!
|
|
|
|
@imap = Net::IMAP.new(
|
|
|
|
@config[:imap][:server],
|
|
|
|
ssl: {verify_mode: OpenSSL::SSL::VERIFY_NONE},
|
|
|
|
port: 993
|
|
|
|
)
|
|
|
|
@imap.login(@config[:imap][:login], @config[:imap][:password])
|
|
|
|
end
|
|
|
|
|
|
|
|
def disconnect!
|
|
|
|
imap.logout
|
|
|
|
imap.disconnect
|
|
|
|
end
|
|
|
|
|
|
|
|
MAIL_REGEXP = /\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b/
|
|
|
|
|
|
|
|
def examine_message message
|
|
|
|
m = Mail.read_from_string message.attr[@saved_key]
|
|
|
|
return if m.from.nil?
|
|
|
|
return if m.to.nil?
|
2016-08-08 09:43:59 +00:00
|
|
|
return unless m.subject =~ @match_re
|
2016-06-16 23:32:08 +00:00
|
|
|
|
|
|
|
begin
|
|
|
|
puts "\n### #{m.subject}"
|
|
|
|
puts "### #{m.date}"
|
|
|
|
print "#{m.from.first} --> #{m.to.join(',')} "
|
|
|
|
|
|
|
|
attach = m.attachments.first
|
2016-08-08 08:40:43 +00:00
|
|
|
fn = "%s/%s/projet-%s.tar" % [@config[:output], m.from.first, m.date.to_date.to_s]
|
2016-06-16 23:32:08 +00:00
|
|
|
puts "- #{fn}"
|
|
|
|
begin
|
|
|
|
FileUtils.mkdir_p File.dirname(fn)
|
|
|
|
File.open( fn, "w+b", 0644 ) { |f| f.write attach.decoded}
|
|
|
|
rescue Exception => e
|
|
|
|
puts "Error : Unable to save data for #{fn} because #{e.message}"
|
|
|
|
end
|
|
|
|
rescue Encoding::ConverterNotFoundError
|
|
|
|
STDERR.puts "ERROR: encoding problem in email. Unable to convert."
|
|
|
|
end
|
|
|
|
|
|
|
|
return
|
|
|
|
end
|
|
|
|
|
|
|
|
def examine_all
|
|
|
|
@imap.select "INBOX"
|
|
|
|
# ids = @imap.search('SUBJECT NEXTFORMATION')
|
|
|
|
ids = @imap.sort(['DATE'], ['SUBJECT', 'NEXTFORMATION'], 'US-ASCII')
|
|
|
|
if ids.empty?
|
|
|
|
puts "\tFound no messages"
|
|
|
|
else
|
|
|
|
examine_message_list "INBOX", ids
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def examine_message_list mailbox_name, ids
|
|
|
|
ids.each do |id|
|
|
|
|
@imap.select mailbox_name #GYR: TEST
|
|
|
|
message = imap.fetch(id, [@saved_key])[0]
|
|
|
|
examine_message message
|
|
|
|
end
|
|
|
|
rescue IOError
|
|
|
|
# re-connect and try again
|
|
|
|
connect!
|
|
|
|
retry
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
class Crawler < Thor
|
|
|
|
CONFIG_FILE = 'config/secrey.yml'
|
|
|
|
|
|
|
|
include Thor::Actions
|
|
|
|
default_task :crawl
|
|
|
|
|
|
|
|
|
2016-08-08 08:40:43 +00:00
|
|
|
option :match, required: true
|
|
|
|
option :output, required: true
|
2016-06-16 23:32:08 +00:00
|
|
|
desc 'crawl', 'Crawls email to save mails'
|
|
|
|
def crawl
|
|
|
|
#saved_info = []
|
|
|
|
parse_configuration
|
2016-08-08 09:43:59 +00:00
|
|
|
@config[:match] = options[:match]
|
|
|
|
@config[:output] = options[:output]
|
2016-06-16 23:32:08 +00:00
|
|
|
|
|
|
|
## Run application
|
|
|
|
app = CrawlerApp.new @config
|
|
|
|
app.connect!
|
|
|
|
app.examine_all
|
|
|
|
app.disconnect!
|
|
|
|
end
|
|
|
|
|
|
|
|
def initialize *args
|
|
|
|
@config = {}
|
|
|
|
super
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
|
|
|
|
def parse_configuration
|
|
|
|
## Load configuration
|
|
|
|
#
|
|
|
|
unless File.exist? EWOGA_CONFIG_FILE then
|
|
|
|
puts "Creating sample configuration file #{EWOGA_CONFIG_FILE}"
|
|
|
|
FileUtils.mkdir_p File.dirname(EWOGA_CONFIG_FILE)
|
|
|
|
File.open(EWOGA_CONFIG_FILE, "w") do |fh|
|
|
|
|
fh.puts "imap:"
|
|
|
|
fh.puts " server: EXAMPLE.COM"
|
|
|
|
fh.puts " login: FOO"
|
|
|
|
fh.puts " password: BAR"
|
|
|
|
end
|
|
|
|
exit 1
|
|
|
|
end
|
|
|
|
|
|
|
|
@config.merge! Hash.transform_keys_to_symbols(
|
|
|
|
YAML::load( File.open( EWOGA_CONFIG_FILE ) )
|
|
|
|
)
|
|
|
|
|
|
|
|
## Validate configuration structure
|
|
|
|
validations = {
|
|
|
|
imap: {
|
|
|
|
server: 'string',
|
|
|
|
login: 'string',
|
|
|
|
password: 'string'
|
|
|
|
}
|
|
|
|
}
|
|
|
|
validator = HashValidator.validate(@config, validations)
|
|
|
|
|
|
|
|
raise InvalidConfiguration.new(
|
|
|
|
"Configuration is not valid: #{validator.errors.inspect}"
|
|
|
|
) unless validator.valid?
|
|
|
|
|
|
|
|
raise InvalidConfiguration.new(
|
|
|
|
"Configuration is not valid: please modify #{EWOGA_CONFIG_FILE}"
|
|
|
|
) if @config[:imap][:server] == "EXAMPLE.COM"
|
|
|
|
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
begin
|
|
|
|
Ewoga::Crawler.start ARGV
|
|
|
|
exit 0
|
|
|
|
rescue SystemExit => e
|
|
|
|
raise e
|
|
|
|
rescue Exception => e
|
|
|
|
puts "ERROR: #{e.class} #{e}"
|
2016-08-08 09:43:59 +00:00
|
|
|
puts e.backtrace
|
2016-06-16 23:32:08 +00:00
|
|
|
exit 1
|
|
|
|
end
|