#!/usr/bin/env ruby #require 'bundler/setup' #Bundler.require require 'pry' require 'zlib' require 'net/imap' require 'pp' require 'mechanize' require 'yaml' require 'hash_validator' require 'uri' require 'thor' require 'json' require 'mail' require 'colorize' #Net::IMAP.debug = true class Hash #take keys of hash and transform those to a symbols def self.transform_keys_to_symbols(value) return value if not value.is_a?(Hash) hash = value.inject({}) do |memo,(k,v)| memo[k.to_sym] = Hash.transform_keys_to_symbols(v); memo end return hash end end module Epafh EPAFI_CONFIG_FILE = File.join(ENV['HOME'],'.epafh','config.yml') EPAFI_IGNORE_FILE = File.join(ENV['HOME'],'.epafh','ignore.yml') class ContactManager CRM_LOGIN_URL = '/login' CRM_LEADS_URL = '/leads.json' CRM_CONTACTS_URL = '/contacts.json' def initialize config @config = config @browser = Mechanize.new { |agent| agent.user_agent_alias = 'Mac Safari' } @ignore_list = Set.new @keep_list = Set.new ## Load configuration file # unless File.exist? EPAFI_CONFIG_FILE then raise "Unable to find configuration file #{EPAFI_CONFIG_FILE}" end @config = config connect! load_contacts load_leads load_ignore #puts @keep_list.to_a rescue RuntimeError => e STDERR.puts e.message end def connect! @browser.get(@config[:crm][:baseurl] + CRM_LOGIN_URL) do |page| page.form_with(action: '/authentication') do |f| f['authentication[username]'] = @config[:crm][:login] f['authentication[password]'] = @config[:crm][:password] end.click_button end rescue Mechanize::ResponseCodeError raise "Authentication error. Verify your credentials." end def load_ignore if File.exist? EPAFI_IGNORE_FILE ignore_list = YAML.load_file(EPAFI_IGNORE_FILE) ignore_list.each do |email| @ignore_list << email.strip.downcase end end end def load_leads page=1 crm_leads_page = @browser.get(@config[:crm][:baseurl] + CRM_LEADS_URL + "?page=#{page}") crm_leads = JSON.parse crm_leads_page.body crm_leads.each do |lead_obj| keep_contact lead_obj['lead']['email'].split(',') keep_contact lead_obj['lead']['alt_email'].split(',') end if crm_leads.size > 0 then load_leads (page + 1) end end def load_contacts page=1 crm_contacts_page = @browser.get(@config[:crm][:baseurl] + CRM_CONTACTS_URL + "?page=#{page}") crm_contacts = JSON.parse crm_contacts_page.body crm_contacts.each do |contact_obj| keep_contact contact_obj['contact']['email'].split(',') keep_contact contact_obj['contact']['alt_email'].split(',') end if crm_contacts.size > 0 then load_contacts (page + 1) end #contacts.to_a.sort.join(', ') end def keep_contact emails emails = emails.to_a if emails.is_a? Set [emails].flatten.each do |mail| @keep_list << mail.strip.downcase end end def ignore_contact emails emails = emails.to_a if emails.is_a? Set [emails].flatten.each do |mail| @ignore_list << mail.strip.downcase end File.open(EPAFI_IGNORE_FILE, 'w') do |f| f.write @ignore_list.to_a.to_yaml end end def include? mail return ( (@ignore_list.include? mail.strip.downcase) or (@keep_list.include? mail.strip.downcase) ) end end class CrawlerApp attr_reader :imap attr_reader :contacts TMPMAIL_FILE = '.tmpmail' def initialize config @saved_key = 'RFC822' @filter_headers = 'BODY[HEADER.FIELDS (FROM TO Subject)]'.upcase @config = config @imap = nil @contact_manager = ContactManager.new config end def connect! @imap = Net::IMAP.new( @config[:imap][:server], ssl: {verify_mode: OpenSSL::SSL::VERIFY_NONE}, port: 993 ) @imap.login(@config[:imap][:login], @config[:imap][:password]) #@imap.select(SOURCE_MAILBOX) end def disconnect! imap.logout imap.disconnect end MAIL_REGEXP = /\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b/ def examine_message message m = Mail.read_from_string message.attr[@saved_key] return if m.from.nil? return if m.to.nil? emails = Set.new begin emails.merge m.from emails.merge [m.to].flatten if m.to emails.merge [m.cc].flatten if m.cc rescue => e binding.pry end body_emails = Set.new m.body.parts.each do |part| next if part.content_type != 'text/plain' #body_emails = m.body.decoded.scan MAIL_REGEXP part_emails = part.decoded.scan MAIL_REGEXP #pp body_emails if not part_emails.empty? then body_emails.merge part_emails end end emails.merge body_emails # puts emails.to_a.join(' , ') remaining_emails = ( emails .map{ |e| [e, (@contact_manager.include? e)] } .select{ |e,t| !t } ) seen_emails = ( remaining_emails .empty? ) # puts @contacts.to_a.join(', ') if seen_emails then print "." return else puts "" all_addr = { from: (m.from || []), to: (m.to || []), cc: (m.cc || []), body: (body_emails || []) } all_addr.each do |key, list| list.each do |addr| addr_str = if remaining_emails.map{|e,t| e}.include? addr then addr.yellow.on_black else addr end str = "%4s: %s" % [key.to_s.upcase, addr_str] puts str end end puts "" #puts " ORIGINAL EMAILS: #{emails.to_a.join(', ')}" #puts "REMAINING EMAILS: #{remaining_emails.map{|e,t| e}.join(', ')}".yellow.on_black #puts " SEEN EMAILS: #{seen_emails}" end while true begin puts "\n### #{m.subject}" print "#{m.from.join(',')} --> #{m.to.join(',')} " puts "[Ignore/Add/Skip/Detail] ?" i = STDIN.gets case i.strip when /^[iI]$/ then # ignore @contact_manager.ignore_contact remaining_emails.map{|e,t| e} break when /^[aA]$/ then # add @contact_manager.keep_contact remaining_emails.map{|e,t| e} break when /^[sS]$/ then #skip break when /^[dD]$/ then # decode # puts m.body.decoded File.open(TMPMAIL_FILE + ".2", 'w') do |f| f.write message.attr[@saved_key] end system "formail < #{TMPMAIL_FILE}.2 > #{TMPMAIL_FILE}" system "mutt -R -f #{TMPMAIL_FILE}" end rescue Encoding::ConverterNotFoundError STDERR.puts "ERROR: encoding problem in email. Unable to convert." end end return end def examine_all @imap.list('', '*').each do |mailbox| puts "\nMAILBOX #{mailbox.name}".yellow next unless mailbox.name =~ /#{@config[:imap][:pattern]}/ @imap.examine mailbox.name puts "Searching #{mailbox.name}" messages_in_mailbox = @imap.responses['EXISTS'][0] if not messages_in_mailbox then say "#{mailbox.name} does not have any messages" next end @imap.select mailbox.name #GYR: TEST ids = @imap.search('SINCE 1-Jan-2001') # NOT OR TO "@agilefant.org" CC "@agilefant.org"') if ids.empty? puts "\tFound no messages" else examine_message_list mailbox.name, ids end end end def examine_message_list mailbox_name, ids ids.each do |id| @imap.select mailbox_name #GYR: TEST message = imap.fetch(id, [@saved_key])[0] examine_message message end rescue IOError # re-connect and try again connect! retry end end class Crawler < Thor CONFIG_FILE = 'config/secrey.yml' include Thor::Actions default_task :crawl desc 'crawl', 'Crawls email to save mails' def crawl #saved_info = [] parse_configuration ## Run application app = CrawlerApp.new @config app.connect! app.examine_all #pp saved_info app.disconnect! end def initialize *args @config = {} super end private def parse_configuration ## Load configuration @config.merge! Hash.transform_keys_to_symbols( YAML::load( File.open( EPAFI_CONFIG_FILE ) ) ) ## Validate configuration structure validations = { crm: { baseurl: lambda { |url| url =~ URI::regexp }, login: 'string', password: 'string' }, imap: { server: 'string', login: 'string', password: 'string' } } validator = HashValidator.validate(@config, validations) raise "Configuration is not valid: #{validator.errors.inspect}" unless validator.valid? end end end Epafh::Crawler.start