diff --git a/lib/epafh/app.rb b/lib/epafh/app.rb index 95f0fa4..618309c 100644 --- a/lib/epafh/app.rb +++ b/lib/epafh/app.rb @@ -49,7 +49,7 @@ class Epafh::App < Thor parse_configuration ## Run application - app = Crawler.new @config + app = Epafh::Crawler.new @config app.connect! app.examine_all diff --git a/lib/epafh/contact_manager.rb b/lib/epafh/contact_manager.rb index 23b9654..c789b30 100644 --- a/lib/epafh/contact_manager.rb +++ b/lib/epafh/contact_manager.rb @@ -17,8 +17,8 @@ class Epafh::ContactManager ## Load configuration file # - unless File.exist? EPAFI_CONFIG_FILE then - raise "Unable to find configuration file #{EPAFI_CONFIG_FILE}" + unless File.exist? Epafh::EPAFI_CONFIG_FILE then + raise "Unable to find configuration file #{Epafh::EPAFI_CONFIG_FILE}" end @config = config @@ -45,8 +45,8 @@ class Epafh::ContactManager end def load_ignore - if File.exist? EPAFI_IGNORE_FILE - ignore_list = YAML.load_file(EPAFI_IGNORE_FILE) + if File.exist? Epafh::EPAFI_IGNORE_FILE + ignore_list = YAML.load_file(Epafh::EPAFI_IGNORE_FILE) ignore_list.each do |email| @ignore_list << email.strip.downcase end @@ -92,7 +92,7 @@ class Epafh::ContactManager [emails].flatten.each do |mail| @ignore_list << mail.strip.downcase end - File.open(EPAFI_IGNORE_FILE, 'w') do |f| + File.open(Epafh::EPAFI_IGNORE_FILE, 'w') do |f| f.write @ignore_list.to_a.to_yaml end end diff --git a/lib/epafh/crawler.rb b/lib/epafh/crawler.rb index f7b5d5a..a8c481f 100644 --- a/lib/epafh/crawler.rb +++ b/lib/epafh/crawler.rb @@ -4,16 +4,16 @@ class Epafh::Crawler attr_reader :contacts TMPMAIL_FILE = '.tmpmail' + MAIL_REGEXP = /\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b/ def initialize config @saved_key = 'RFC822' @filter_headers = 'BODY[HEADER.FIELDS (FROM TO Subject)]'.upcase @config = config @imap = nil - @contact_manager = ContactManager.new config + @contact_manager = Epafh::ContactManager.new config end - def connect! @imap = Net::IMAP.new( @config[:imap][:server], @@ -29,83 +29,45 @@ class Epafh::Crawler imap.disconnect end - MAIL_REGEXP = /\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b/ - def examine_message message m = Mail.read_from_string message.attr[@saved_key] return if m.from.nil? return if m.to.nil? + body_emails = extract_body_mail m.body.parts + ## Create association between extracted addreses and email part + mail_struct = { + from: [m.from || []].flatten, + to: [m.to || []].flatten, + cc: [m.cc || []].flatten, + body: (body_emails.to_a || []) + } emails = Set.new - emails.merge m.from - emails.merge [m.to].flatten if m.to - emails.merge [m.cc].flatten if m.cc + mail_struct.each {|key, val| emails.merge val } + remaining_emails = emails.reject{|e| @contact_manager.include?(e) } - body_emails = Set.new - m.body.parts.each do |part| - next if part.content_type != 'text/plain' - - #body_emails = m.body.decoded.scan MAIL_REGEXP - part_emails = part.decoded.scan MAIL_REGEXP - #pp body_emails - if not part_emails.empty? then - body_emails.merge part_emails - end - end - emails.merge body_emails - - # puts emails.to_a.join(' , ') - remaining_emails = ( - emails - .map{ |e| [e, (@contact_manager.include? e)] } - .select{ |e,t| !t } - ) - seen_emails = ( - remaining_emails - .empty? - ) - # puts @contacts.to_a.join(', ') - if seen_emails then + # Skip examination of no addresses are remaining + if remaining_emails.empty? then print "." return - else - puts "" - all_addr = { - from: (m.from || []), - to: (m.to || []), - cc: (m.cc || []), - body: (body_emails || []) - } - all_addr.each do |key, list| - list.each do |addr| - addr_str = if remaining_emails.map{|e,t| e}.include? addr then - addr.yellow.on_black - else addr - end - str = "%4s: %s" % [key.to_s.upcase, addr_str] - puts str - end - end - puts "" - #puts " ORIGINAL EMAILS: #{emails.to_a.join(', ')}" - #puts "REMAINING EMAILS: #{remaining_emails.map{|e,t| e}.join(', ')}".yellow.on_black - #puts " SEEN EMAILS: #{seen_emails}" end + display_header mail_struct, remaining_emails + while true begin puts "\n### #{m.subject}" - print "#{m.from.join(',')} --> #{m.to.join(',')} " + print "#{mail_struct[:from].join(',')} --> #{mail_struct[:to].join(',')} " puts "[Ignore/Add/Skip/Detail] ?" i = STDIN.gets case i.strip when /^[iI]$/ then # ignore - @contact_manager.ignore_contact remaining_emails.map{|e,t| e} + @contact_manager.ignore_contact remaining_emails break when /^[aA]$/ then # add - @contact_manager.keep_contact remaining_emails.map{|e,t| e} + @contact_manager.keep_contact remaining_emails break when /^[sS]$/ then #skip break @@ -161,4 +123,32 @@ class Epafh::Crawler retry end + def extract_body_mail body_parts + body_emails = Set.new + body_parts.each do |part| + next if part.content_type != 'text/plain' + + part_emails = part.decoded.scan MAIL_REGEXP + if not part_emails.empty? then + body_emails.merge part_emails + end + end + body_emails + end + + def display_header header_struct, remaining_emails + puts "" + header_struct.each do |key, list| + pp list + list.each do |addr| + addr_str = if remaining_emails.include? addr then + addr.yellow.on_black + else addr + end + str = "%4s: %s" % [key.to_s.upcase, addr_str] + puts str + end + end + puts "" + end end