ewoga/bin/ewoga-fetch
2016-08-08 10:40:43 +02:00

198 lines
4.7 KiB
Ruby
Executable file

#!/usr/bin/env ruby
# set ts=2 sw=2 et
require 'pry'
require 'zlib'
require 'net/imap'
require 'pp'
require 'mechanize'
require 'yaml'
require 'hash_validator'
require 'uri'
require 'thor'
require 'json'
require 'mail'
require 'colorize'
#Net::IMAP.debug = true
class Hash
#take keys of hash and transform those to a symbols
def self.transform_keys_to_symbols(value)
return value if not value.is_a?(Hash)
hash = value.inject({}) do |memo,(k,v)|
memo[k.to_sym] = Hash.transform_keys_to_symbols(v); memo
end
return hash
end
end
module Ewoga
EWOGA_CONFIG_FILE = File.join(ENV['HOME'],'.ewoga','config.yml')
EWOGA_IGNORE_FILE = File.join(ENV['HOME'],'.ewoga','ignore.yml')
class InvalidConfiguration < Exception ; end
class CrawlerApp
attr_reader :imap
attr_reader :contacts
TMPMAIL_FILE = '.tmpmail'
def initialize config
@saved_key = 'RFC822'
@filter_headers = 'BODY[HEADER.FIELDS (FROM TO Subject)]'.upcase
@config = config
@imap = nil
end
def connect!
@imap = Net::IMAP.new(
@config[:imap][:server],
ssl: {verify_mode: OpenSSL::SSL::VERIFY_NONE},
port: 993
)
@imap.login(@config[:imap][:login], @config[:imap][:password])
end
def disconnect!
imap.logout
imap.disconnect
end
MAIL_REGEXP = /\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b/
def examine_message message
m = Mail.read_from_string message.attr[@saved_key]
return if m.from.nil?
return if m.to.nil?
return unless m.subject =~ Regexp.new(@config[:match])
begin
puts "\n### #{m.subject}"
puts "### #{m.date}"
print "#{m.from.first} --> #{m.to.join(',')} "
attach = m.attachments.first
fn = "%s/%s/projet-%s.tar" % [@config[:output], m.from.first, m.date.to_date.to_s]
puts "- #{fn}"
begin
FileUtils.mkdir_p File.dirname(fn)
File.open( fn, "w+b", 0644 ) { |f| f.write attach.decoded}
rescue Exception => e
puts "Error : Unable to save data for #{fn} because #{e.message}"
end
rescue Encoding::ConverterNotFoundError
STDERR.puts "ERROR: encoding problem in email. Unable to convert."
end
return
end
def examine_all
@imap.select "INBOX"
# ids = @imap.search('SUBJECT NEXTFORMATION')
ids = @imap.sort(['DATE'], ['SUBJECT', 'NEXTFORMATION'], 'US-ASCII')
if ids.empty?
puts "\tFound no messages"
else
examine_message_list "INBOX", ids
end
end
def examine_message_list mailbox_name, ids
ids.each do |id|
@imap.select mailbox_name #GYR: TEST
message = imap.fetch(id, [@saved_key])[0]
examine_message message
end
rescue IOError
# re-connect and try again
connect!
retry
end
end
class Crawler < Thor
CONFIG_FILE = 'config/secrey.yml'
include Thor::Actions
default_task :crawl
option :match, required: true
option :output, required: true
desc 'crawl', 'Crawls email to save mails'
def crawl
#saved_info = []
parse_configuration
@config['match'] = options[:match]
@config['output'] = options[:match]
## Run application
app = CrawlerApp.new @config
app.connect!
app.examine_all
app.disconnect!
end
def initialize *args
@config = {}
super
end
private
def parse_configuration
## Load configuration
#
unless File.exist? EWOGA_CONFIG_FILE then
puts "Creating sample configuration file #{EWOGA_CONFIG_FILE}"
FileUtils.mkdir_p File.dirname(EWOGA_CONFIG_FILE)
File.open(EWOGA_CONFIG_FILE, "w") do |fh|
fh.puts "imap:"
fh.puts " server: EXAMPLE.COM"
fh.puts " login: FOO"
fh.puts " password: BAR"
end
exit 1
end
@config.merge! Hash.transform_keys_to_symbols(
YAML::load( File.open( EWOGA_CONFIG_FILE ) )
)
## Validate configuration structure
validations = {
imap: {
server: 'string',
login: 'string',
password: 'string'
}
}
validator = HashValidator.validate(@config, validations)
raise InvalidConfiguration.new(
"Configuration is not valid: #{validator.errors.inspect}"
) unless validator.valid?
raise InvalidConfiguration.new(
"Configuration is not valid: please modify #{EWOGA_CONFIG_FILE}"
) if @config[:imap][:server] == "EXAMPLE.COM"
end
end
end
begin
Ewoga::Crawler.start ARGV
exit 0
rescue SystemExit => e
raise e
rescue Exception => e
puts "ERROR: #{e.class} #{e}"
exit 1
end