#!/usr/bin/env ruby # set ts=2 sw=2 et require 'pry' require 'zlib' require 'net/imap' require 'pp' require 'mechanize' require 'yaml' require 'hash_validator' require 'uri' require 'thor' require 'json' require 'mail' require 'colorize' #Net::IMAP.debug = true class Hash #take keys of hash and transform those to a symbols def self.transform_keys_to_symbols(value) return value if not value.is_a?(Hash) hash = value.inject({}) do |memo,(k,v)| memo[k.to_sym] = Hash.transform_keys_to_symbols(v); memo end return hash end end module Ewoga EWOGA_CONFIG_FILE = File.join(ENV['HOME'],'.ewoga','config.yml') EWOGA_IGNORE_FILE = File.join(ENV['HOME'],'.ewoga','ignore.yml') class InvalidConfiguration < Exception ; end class CrawlerApp attr_reader :imap attr_reader :contacts TMPMAIL_FILE = '.tmpmail' def initialize config @saved_key = 'RFC822' @filter_headers = 'BODY[HEADER.FIELDS (FROM TO Subject)]'.upcase @config = config @imap = nil puts @config.inspect @match_re = Regexp.new(@config[:match]) end def connect! @imap = Net::IMAP.new( @config[:imap][:server], ssl: {verify_mode: OpenSSL::SSL::VERIFY_NONE}, port: 993 ) @imap.login(@config[:imap][:login], @config[:imap][:password]) end def disconnect! imap.logout imap.disconnect end MAIL_REGEXP = /\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b/ def examine_message message m = Mail.read_from_string message.attr[@saved_key] return if m.from.nil? return if m.to.nil? return unless m.subject =~ @match_re begin puts "\n### #{m.subject}" puts "### #{m.date}" print "#{m.from.first} --> #{m.to.join(',')} " attach = m.attachments.first fn = "%s/%s/projet-%s.tar" % [@config[:output], m.from.first, m.date.to_date.to_s] puts "- #{fn}" begin FileUtils.mkdir_p File.dirname(fn) File.open( fn, "w+b", 0644 ) { |f| f.write attach.decoded} rescue Exception => e puts "Error : Unable to save data for #{fn} because #{e.message}" end rescue Encoding::ConverterNotFoundError STDERR.puts "ERROR: encoding problem in email. Unable to convert." end return end def examine_all @imap.select "INBOX" # ids = @imap.search('SUBJECT NEXTFORMATION') ids = @imap.sort(['DATE'], ['SUBJECT', 'NEXTFORMATION'], 'US-ASCII') if ids.empty? puts "\tFound no messages" else examine_message_list "INBOX", ids end end def examine_message_list mailbox_name, ids ids.each do |id| @imap.select mailbox_name #GYR: TEST message = imap.fetch(id, [@saved_key])[0] examine_message message end rescue IOError # re-connect and try again connect! retry end end class Crawler < Thor CONFIG_FILE = 'config/secrey.yml' include Thor::Actions default_task :crawl option :match, required: true option :output, required: true desc 'crawl', 'Crawls email to save mails' def crawl #saved_info = [] parse_configuration @config[:match] = options[:match] @config[:output] = options[:output] ## Run application app = CrawlerApp.new @config app.connect! app.examine_all app.disconnect! end def initialize *args @config = {} super end private def parse_configuration ## Load configuration # unless File.exist? EWOGA_CONFIG_FILE then puts "Creating sample configuration file #{EWOGA_CONFIG_FILE}" FileUtils.mkdir_p File.dirname(EWOGA_CONFIG_FILE) File.open(EWOGA_CONFIG_FILE, "w") do |fh| fh.puts "imap:" fh.puts " server: EXAMPLE.COM" fh.puts " login: FOO" fh.puts " password: BAR" end exit 1 end @config.merge! Hash.transform_keys_to_symbols( YAML::load( File.open( EWOGA_CONFIG_FILE ) ) ) ## Validate configuration structure validations = { imap: { server: 'string', login: 'string', password: 'string' } } validator = HashValidator.validate(@config, validations) raise InvalidConfiguration.new( "Configuration is not valid: #{validator.errors.inspect}" ) unless validator.valid? raise InvalidConfiguration.new( "Configuration is not valid: please modify #{EWOGA_CONFIG_FILE}" ) if @config[:imap][:server] == "EXAMPLE.COM" end end end begin Ewoga::Crawler.start ARGV exit 0 rescue SystemExit => e raise e rescue Exception => e puts "ERROR: #{e.class} #{e}" puts e.backtrace exit 1 end