Compare commits
11 commits
Author | SHA1 | Date | |
---|---|---|---|
ee6e689157 | |||
b53cb336f7 | |||
1460435676 | |||
6a7ad7733d | |||
f07a23833f | |||
7bfc3beb2a | |||
b2f55c89f0 | |||
8669fdaa83 | |||
8654b94cae | |||
5bb262dc92 | |||
08c63bb7fd |
11 changed files with 436 additions and 364 deletions
|
@ -4,11 +4,13 @@ PATH
|
|||
epafh (0.1.0)
|
||||
colorize
|
||||
hash_validator
|
||||
highline
|
||||
json
|
||||
mail (~> 2.6.3)
|
||||
mechanize
|
||||
pry
|
||||
pry-rescue
|
||||
ruby-progressbar
|
||||
thor
|
||||
|
||||
GEM
|
||||
|
@ -19,6 +21,7 @@ GEM
|
|||
domain_name (0.5.25)
|
||||
unf (>= 0.0.5, < 1.0.0)
|
||||
hash_validator (0.4.0)
|
||||
highline (1.7.8)
|
||||
http-cookie (1.0.2)
|
||||
domain_name (~> 0.5)
|
||||
interception (0.5)
|
||||
|
@ -50,6 +53,7 @@ GEM
|
|||
interception (>= 0.5)
|
||||
pry
|
||||
rake (10.4.2)
|
||||
ruby-progressbar (1.7.5)
|
||||
slop (3.6.0)
|
||||
thor (0.19.1)
|
||||
unf (0.1.4)
|
||||
|
|
6
TODO.md
6
TODO.md
|
@ -3,6 +3,12 @@ TODO
|
|||
|
||||
## Features
|
||||
|
||||
* Add support for mailbox exclude filter
|
||||
* Add support for email exclude filter
|
||||
|
||||
support@.*
|
||||
no-reply.*@.*
|
||||
|
||||
* Add POP3 server support
|
||||
|
||||
|
||||
|
|
351
bin/epafh
351
bin/epafh
|
@ -15,352 +15,13 @@ require 'thor'
|
|||
require 'json'
|
||||
require 'mail'
|
||||
require 'colorize'
|
||||
require 'epafh'
|
||||
|
||||
#Net::IMAP.debug = true
|
||||
|
||||
class Hash
|
||||
#take keys of hash and transform those to a symbols
|
||||
def self.transform_keys_to_symbols(value)
|
||||
return value if not value.is_a?(Hash)
|
||||
hash = value.inject({}) do |memo,(k,v)|
|
||||
memo[k.to_sym] = Hash.transform_keys_to_symbols(v); memo
|
||||
end
|
||||
return hash
|
||||
end
|
||||
begin
|
||||
Epafh::App.start(ARGV)
|
||||
rescue Interrupt
|
||||
puts " Interrupt! Exiting."
|
||||
exit 1
|
||||
end
|
||||
|
||||
module Epafh
|
||||
EPAFI_CONFIG_FILE = File.join(ENV['HOME'],'.epafh','config.yml')
|
||||
EPAFI_IGNORE_FILE = File.join(ENV['HOME'],'.epafh','ignore.yml')
|
||||
|
||||
class ContactManager
|
||||
|
||||
CRM_LOGIN_URL = '/login'
|
||||
CRM_LEADS_URL = '/leads.json'
|
||||
CRM_CONTACTS_URL = '/contacts.json'
|
||||
|
||||
|
||||
def initialize config
|
||||
@config = config
|
||||
|
||||
@browser = Mechanize.new { |agent|
|
||||
agent.user_agent_alias = 'Mac Safari'
|
||||
}
|
||||
@ignore_list = Set.new
|
||||
@keep_list = Set.new
|
||||
|
||||
## Load configuration file
|
||||
#
|
||||
|
||||
unless File.exist? EPAFI_CONFIG_FILE then
|
||||
raise "Unable to find configuration file #{EPAFI_CONFIG_FILE}"
|
||||
end
|
||||
@config = config
|
||||
|
||||
|
||||
connect!
|
||||
load_contacts
|
||||
load_leads
|
||||
load_ignore
|
||||
#puts @keep_list.to_a
|
||||
rescue RuntimeError => e
|
||||
STDERR.puts e.message
|
||||
end
|
||||
|
||||
def connect!
|
||||
@browser.get(@config[:crm][:baseurl] + CRM_LOGIN_URL) do |page|
|
||||
page.form_with(action: '/authentication') do |f|
|
||||
f['authentication[username]'] = @config[:crm][:login]
|
||||
f['authentication[password]'] = @config[:crm][:password]
|
||||
end.click_button
|
||||
end
|
||||
|
||||
rescue Mechanize::ResponseCodeError
|
||||
raise "Authentication error. Verify your credentials."
|
||||
end
|
||||
|
||||
def load_ignore
|
||||
if File.exist? EPAFI_IGNORE_FILE
|
||||
ignore_list = YAML.load_file(EPAFI_IGNORE_FILE)
|
||||
ignore_list.each do |email|
|
||||
@ignore_list << email.strip.downcase
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def load_leads page=1
|
||||
crm_leads_page = @browser.get(@config[:crm][:baseurl] + CRM_LEADS_URL + "?page=#{page}")
|
||||
crm_leads = JSON.parse crm_leads_page.body
|
||||
crm_leads.each do |lead_obj|
|
||||
keep_contact lead_obj['lead']['email'].split(',')
|
||||
keep_contact lead_obj['lead']['alt_email'].split(',')
|
||||
end
|
||||
|
||||
if crm_leads.size > 0 then
|
||||
load_leads (page + 1)
|
||||
end
|
||||
end
|
||||
|
||||
def load_contacts page=1
|
||||
crm_contacts_page = @browser.get(@config[:crm][:baseurl] + CRM_CONTACTS_URL + "?page=#{page}")
|
||||
crm_contacts = JSON.parse crm_contacts_page.body
|
||||
crm_contacts.each do |contact_obj|
|
||||
keep_contact contact_obj['contact']['email'].split(',')
|
||||
keep_contact contact_obj['contact']['alt_email'].split(',')
|
||||
end
|
||||
|
||||
if crm_contacts.size > 0 then
|
||||
load_contacts (page + 1)
|
||||
end
|
||||
#contacts.to_a.sort.join(', ')
|
||||
end
|
||||
|
||||
def keep_contact emails
|
||||
emails = emails.to_a if emails.is_a? Set
|
||||
[emails].flatten.each do |mail|
|
||||
@keep_list << mail.strip.downcase
|
||||
end
|
||||
end
|
||||
|
||||
def ignore_contact emails
|
||||
emails = emails.to_a if emails.is_a? Set
|
||||
[emails].flatten.each do |mail|
|
||||
@ignore_list << mail.strip.downcase
|
||||
end
|
||||
File.open(EPAFI_IGNORE_FILE, 'w') do |f|
|
||||
f.write @ignore_list.to_a.to_yaml
|
||||
end
|
||||
end
|
||||
|
||||
def include? mail
|
||||
return (
|
||||
(@ignore_list.include? mail.strip.downcase) or
|
||||
(@keep_list.include? mail.strip.downcase)
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
class CrawlerApp
|
||||
attr_reader :imap
|
||||
attr_reader :contacts
|
||||
|
||||
TMPMAIL_FILE = '.tmpmail'
|
||||
|
||||
def initialize config
|
||||
@saved_key = 'RFC822'
|
||||
@filter_headers = 'BODY[HEADER.FIELDS (FROM TO Subject)]'.upcase
|
||||
@config = config
|
||||
@imap = nil
|
||||
@contact_manager = ContactManager.new config
|
||||
end
|
||||
|
||||
|
||||
def connect!
|
||||
@imap = Net::IMAP.new(
|
||||
@config[:imap][:server],
|
||||
ssl: {verify_mode: OpenSSL::SSL::VERIFY_NONE},
|
||||
port: 993
|
||||
)
|
||||
@imap.login(@config[:imap][:login], @config[:imap][:password])
|
||||
#@imap.select(SOURCE_MAILBOX)
|
||||
end
|
||||
|
||||
def disconnect!
|
||||
imap.logout
|
||||
imap.disconnect
|
||||
end
|
||||
|
||||
MAIL_REGEXP = /\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b/
|
||||
|
||||
def examine_message message
|
||||
m = Mail.read_from_string message.attr[@saved_key]
|
||||
return if m.from.nil?
|
||||
return if m.to.nil?
|
||||
|
||||
|
||||
emails = Set.new
|
||||
begin
|
||||
emails.merge m.from
|
||||
emails.merge [m.to].flatten if m.to
|
||||
emails.merge [m.cc].flatten if m.cc
|
||||
rescue => e
|
||||
binding.pry
|
||||
end
|
||||
|
||||
body_emails = Set.new
|
||||
m.body.parts.each do |part|
|
||||
next if part.content_type != 'text/plain'
|
||||
|
||||
#body_emails = m.body.decoded.scan MAIL_REGEXP
|
||||
part_emails = part.decoded.scan MAIL_REGEXP
|
||||
#pp body_emails
|
||||
if not part_emails.empty? then
|
||||
body_emails.merge part_emails
|
||||
end
|
||||
end
|
||||
emails.merge body_emails
|
||||
|
||||
# puts emails.to_a.join(' , ')
|
||||
remaining_emails = (
|
||||
emails
|
||||
.map{ |e| [e, (@contact_manager.include? e)] }
|
||||
.select{ |e,t| !t }
|
||||
)
|
||||
seen_emails = (
|
||||
remaining_emails
|
||||
.empty?
|
||||
)
|
||||
# puts @contacts.to_a.join(', ')
|
||||
if seen_emails then
|
||||
print "."
|
||||
return
|
||||
else
|
||||
puts ""
|
||||
all_addr = {
|
||||
from: (m.from || []),
|
||||
to: (m.to || []),
|
||||
cc: (m.cc || []),
|
||||
body: (body_emails || [])
|
||||
}
|
||||
all_addr.each do |key, list|
|
||||
list.each do |addr|
|
||||
addr_str = if remaining_emails.map{|e,t| e}.include? addr then
|
||||
addr.yellow.on_black
|
||||
else addr
|
||||
end
|
||||
str = "%4s: %s" % [key.to_s.upcase, addr_str]
|
||||
puts str
|
||||
end
|
||||
end
|
||||
puts ""
|
||||
#puts " ORIGINAL EMAILS: #{emails.to_a.join(', ')}"
|
||||
#puts "REMAINING EMAILS: #{remaining_emails.map{|e,t| e}.join(', ')}".yellow.on_black
|
||||
#puts " SEEN EMAILS: #{seen_emails}"
|
||||
end
|
||||
|
||||
while true
|
||||
begin
|
||||
puts "\n### #{m.subject}"
|
||||
print "#{m.from.join(',')} --> #{m.to.join(',')} "
|
||||
puts "[Ignore/Add/Skip/Detail] ?"
|
||||
|
||||
i = STDIN.gets
|
||||
case i.strip
|
||||
when /^[iI]$/ then # ignore
|
||||
@contact_manager.ignore_contact remaining_emails.map{|e,t| e}
|
||||
break
|
||||
when /^[aA]$/ then # add
|
||||
@contact_manager.keep_contact remaining_emails.map{|e,t| e}
|
||||
break
|
||||
when /^[sS]$/ then #skip
|
||||
break
|
||||
when /^[dD]$/ then # decode
|
||||
# puts m.body.decoded
|
||||
File.open(TMPMAIL_FILE + ".2", 'w') do |f|
|
||||
f.write message.attr[@saved_key]
|
||||
end
|
||||
system "formail < #{TMPMAIL_FILE}.2 > #{TMPMAIL_FILE}"
|
||||
system "mutt -R -f #{TMPMAIL_FILE}"
|
||||
end
|
||||
rescue Encoding::ConverterNotFoundError
|
||||
STDERR.puts "ERROR: encoding problem in email. Unable to convert."
|
||||
end
|
||||
end
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
def examine_all
|
||||
@imap.list('', '*').each do |mailbox|
|
||||
puts "\nMAILBOX #{mailbox.name}".yellow
|
||||
next unless mailbox.name =~ /#{@config[:imap][:pattern]}/
|
||||
@imap.examine mailbox.name
|
||||
|
||||
puts "Searching #{mailbox.name}"
|
||||
messages_in_mailbox = @imap.responses['EXISTS'][0]
|
||||
if not messages_in_mailbox then
|
||||
say "#{mailbox.name} does not have any messages"
|
||||
next
|
||||
end
|
||||
|
||||
@imap.select mailbox.name #GYR: TEST
|
||||
ids = @imap.search('SINCE 1-Jan-2001')
|
||||
# NOT OR TO "@agilefant.org" CC "@agilefant.org"')
|
||||
if ids.empty?
|
||||
puts "\tFound no messages"
|
||||
else
|
||||
examine_message_list mailbox.name, ids
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def examine_message_list mailbox_name, ids
|
||||
ids.each do |id|
|
||||
@imap.select mailbox_name #GYR: TEST
|
||||
message = imap.fetch(id, [@saved_key])[0]
|
||||
examine_message message
|
||||
end
|
||||
rescue IOError
|
||||
# re-connect and try again
|
||||
connect!
|
||||
retry
|
||||
end
|
||||
end
|
||||
|
||||
class Crawler < Thor
|
||||
|
||||
CONFIG_FILE = 'config/secrey.yml'
|
||||
|
||||
include Thor::Actions
|
||||
default_task :crawl
|
||||
|
||||
|
||||
desc 'crawl', 'Crawls email to save mails'
|
||||
def crawl
|
||||
#saved_info = []
|
||||
parse_configuration
|
||||
|
||||
## Run application
|
||||
app = CrawlerApp.new @config
|
||||
|
||||
app.connect!
|
||||
app.examine_all
|
||||
#pp saved_info
|
||||
app.disconnect!
|
||||
end
|
||||
|
||||
def initialize *args
|
||||
@config = {}
|
||||
super
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
|
||||
def parse_configuration
|
||||
## Load configuration
|
||||
@config.merge! Hash.transform_keys_to_symbols(
|
||||
YAML::load( File.open( EPAFI_CONFIG_FILE ) )
|
||||
)
|
||||
|
||||
## Validate configuration structure
|
||||
validations = {
|
||||
crm: {
|
||||
baseurl: lambda { |url| url =~ URI::regexp },
|
||||
login: 'string',
|
||||
password: 'string'
|
||||
},
|
||||
imap: {
|
||||
server: 'string',
|
||||
login: 'string',
|
||||
password: 'string'
|
||||
}
|
||||
}
|
||||
validator = HashValidator.validate(@config, validations)
|
||||
raise "Configuration is not valid: #{validator.errors.inspect}" unless validator.valid?
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
Epafh::Crawler.start
|
||||
|
||||
|
|
|
@ -1,27 +1,19 @@
|
|||
# coding: utf-8
|
||||
lib = File.expand_path('../lib', __FILE__)
|
||||
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
||||
require 'epafh/version'
|
||||
require 'epafh/constants'
|
||||
|
||||
Gem::Specification.new do |spec|
|
||||
spec.name = "epafh"
|
||||
spec.version = Epafh::VERSION
|
||||
spec.authors = ["@@@No user configured@@@"]
|
||||
spec.email = ["@@@No user configured@@@"]
|
||||
spec.authors = ["Glenn Y. Rolland"]
|
||||
spec.email = ["glenux@glenux.net"]
|
||||
|
||||
spec.summary = %q{TODO: Write a short summary, because Rubygems requires one.}
|
||||
spec.description = %q{TODO: Write a longer description or delete this line.}
|
||||
spec.homepage = "TODO: Put your gem's website or public repo URL here."
|
||||
spec.summary = %q{A handy tool to extract emails and URLs from an IMAP account.}
|
||||
spec.description = %q{A handy tool to extract emails and URLs from an IMAP account.}
|
||||
spec.homepage = "https://github.com/glenux/epafh"
|
||||
spec.license = "LGPL-3"
|
||||
|
||||
# Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
|
||||
# delete this section to allow pushing this gem to any host.
|
||||
if spec.respond_to?(:metadata)
|
||||
spec.metadata['allowed_push_host'] = "TODO: Set to 'http://mygemserver.com'"
|
||||
else
|
||||
raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
|
||||
end
|
||||
|
||||
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
||||
spec.bindir = "bin"
|
||||
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
||||
|
@ -37,4 +29,7 @@ Gem::Specification.new do |spec|
|
|||
spec.add_runtime_dependency "hash_validator"
|
||||
spec.add_runtime_dependency "pry"
|
||||
spec.add_runtime_dependency "pry-rescue"
|
||||
spec.add_runtime_dependency "highline"
|
||||
spec.add_runtime_dependency "ruby-progressbar"
|
||||
end
|
||||
|
||||
|
|
|
@ -1,5 +1,10 @@
|
|||
require "epafh/version"
|
||||
|
||||
module Epafh
|
||||
# Your code goes here...
|
||||
end
|
||||
|
||||
require "epafh/hash"
|
||||
require "epafh/constants"
|
||||
require "epafh/contact_manager"
|
||||
require "epafh/crawler"
|
||||
require "epafh/app"
|
||||
|
||||
|
|
120
lib/epafh/app.rb
Normal file
120
lib/epafh/app.rb
Normal file
|
@ -0,0 +1,120 @@
|
|||
require 'highline'
|
||||
|
||||
class Epafh::App < Thor
|
||||
class InvalidConfiguration < RuntimeError ; end
|
||||
|
||||
CONFIG_FILE = 'config/secrey.yml'
|
||||
CONFIG_DEFAULT = {
|
||||
'imap' => {
|
||||
'server' => '',
|
||||
'login' => '',
|
||||
'password' => ''
|
||||
},
|
||||
'crm' => {
|
||||
'baseurl' => '',
|
||||
'login' => '',
|
||||
'password' => ''
|
||||
}
|
||||
}
|
||||
|
||||
include Thor::Actions
|
||||
default_task :crawl
|
||||
|
||||
|
||||
desc 'config', 'Initialize configuration'
|
||||
def config
|
||||
puts "Welcome to Epafh !".green
|
||||
config = CONFIG_DEFAULT
|
||||
if File.exist? Epafh::EPAFI_CONFIG_FILE then
|
||||
config = config.merge(YAML::load( File.open( Epafh::EPAFI_CONFIG_FILE ) ) || {})
|
||||
end
|
||||
imap_params = {
|
||||
server: {desc: 'IMAP hostname ? ' },
|
||||
login: {desc: 'IMAP username ? ' },
|
||||
password: {desc: 'IMAP password ? ', hidden: true}
|
||||
}
|
||||
crm_params = {
|
||||
baseurl: {desc: 'CRM base url ? ' },
|
||||
login: {desc: 'CRM username ? ' },
|
||||
password: {desc: 'CRM password ? ', hidden: true}
|
||||
}
|
||||
config['imap'] = config_with_rules config['imap'], imap_params
|
||||
config['crm'] = config_with_rules config['crm'], crm_params
|
||||
FileUtils.mkdir_p(Epafh::EPAFI_CONFIG_DIR)
|
||||
File.open(Epafh::EPAFI_CONFIG_FILE, 'w'){|f| f.write(config.to_yaml)}
|
||||
end
|
||||
|
||||
desc 'crawl', 'Crawls email to save mails'
|
||||
def crawl
|
||||
parse_configuration
|
||||
|
||||
## Run application
|
||||
app = Epafh::Crawler.new @config
|
||||
|
||||
app.connect!
|
||||
app.examine_all
|
||||
app.disconnect!
|
||||
end
|
||||
|
||||
def initialize *args
|
||||
@config = {}
|
||||
super
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Ask parameters, with rule constraints
|
||||
#
|
||||
def config_with_rules cfg_in, rules
|
||||
cli = ::HighLine.new
|
||||
cfg_out = cfg_in.clone
|
||||
# Loop parameter rules
|
||||
rules.each.map {|param,values| [param.to_s,values] }
|
||||
.each do |param, values|
|
||||
backup = cfg_out[param]
|
||||
backup_hidden = cfg_out[param].gsub(/./,'*')
|
||||
cfg_out[param] = cli.ask(values[:desc]) do |q|
|
||||
# Disable echo if hidden enabled
|
||||
q.echo = '*' if values[:hidden]
|
||||
|
||||
# Replace default value by stars if hidden
|
||||
if not cfg_out[param].empty? then
|
||||
q.default =
|
||||
if (values[:hidden]) then backup_hidden
|
||||
else cfg_out[param]
|
||||
end
|
||||
end
|
||||
end
|
||||
# When RETURN is pressed, Highline uses default (starred)
|
||||
# We have to replace it with the real value
|
||||
if values[:hidden] and cfg_out[param] == backup_hidden then
|
||||
cfg_out[param] = backup
|
||||
end
|
||||
end
|
||||
cfg_out
|
||||
end
|
||||
|
||||
def parse_configuration
|
||||
## Load configuration
|
||||
@config.merge! Hash.transform_keys_to_symbols(
|
||||
YAML::load( File.open( Epafh::EPAFI_CONFIG_FILE ) )
|
||||
)
|
||||
|
||||
## Validate configuration structure
|
||||
validations = {
|
||||
crm: {
|
||||
baseurl: lambda { |url| url =~ URI::regexp },
|
||||
login: 'string',
|
||||
password: 'string'
|
||||
},
|
||||
imap: {
|
||||
server: 'string',
|
||||
login: 'string',
|
||||
password: 'string'
|
||||
}
|
||||
}
|
||||
validator = HashValidator.validate(@config, validations)
|
||||
raise InvalidConfiguration, "Configuration is not valid: #{validator.errors.inspect}" unless validator.valid?
|
||||
end
|
||||
end
|
||||
|
7
lib/epafh/constants.rb
Normal file
7
lib/epafh/constants.rb
Normal file
|
@ -0,0 +1,7 @@
|
|||
module Epafh
|
||||
VERSION = "0.1.0"
|
||||
|
||||
EPAFI_CONFIG_DIR = File.join(ENV['HOME'], '.epafh')
|
||||
EPAFI_CONFIG_FILE = File.join(EPAFI_CONFIG_DIR, 'config.yml')
|
||||
EPAFI_IGNORE_FILE = File.join(ENV['HOME'], '.epafh', 'ignore.yml')
|
||||
end
|
106
lib/epafh/contact_manager.rb
Normal file
106
lib/epafh/contact_manager.rb
Normal file
|
@ -0,0 +1,106 @@
|
|||
class Epafh::ContactManager
|
||||
|
||||
CRM_LOGIN_URL = '/login'
|
||||
CRM_LEADS_URL = '/leads.json'
|
||||
CRM_CONTACTS_URL = '/contacts.json'
|
||||
|
||||
|
||||
def initialize config
|
||||
@config = config
|
||||
|
||||
@browser = Mechanize.new { |agent|
|
||||
agent.user_agent_alias = 'Mac Safari'
|
||||
}
|
||||
@ignore_list = Set.new
|
||||
@keep_list = Set.new
|
||||
|
||||
## Load configuration file
|
||||
#
|
||||
|
||||
unless File.exist? Epafh::EPAFI_CONFIG_FILE then
|
||||
raise "Unable to find configuration file #{Epafh::EPAFI_CONFIG_FILE}"
|
||||
end
|
||||
@config = config
|
||||
|
||||
|
||||
connect!
|
||||
load_contacts
|
||||
load_leads
|
||||
load_ignore
|
||||
#puts @keep_list.to_a
|
||||
rescue RuntimeError => e
|
||||
STDERR.puts e.message
|
||||
end
|
||||
|
||||
def connect!
|
||||
@browser.get(@config[:crm][:baseurl] + CRM_LOGIN_URL) do |page|
|
||||
page.form_with(action: '/authentication') do |f|
|
||||
f['authentication[username]'] = @config[:crm][:login]
|
||||
f['authentication[password]'] = @config[:crm][:password]
|
||||
end.click_button
|
||||
end
|
||||
|
||||
rescue Mechanize::ResponseCodeError
|
||||
raise "Authentication error. Verify your credentials."
|
||||
end
|
||||
|
||||
def load_ignore
|
||||
if File.exist? Epafh::EPAFI_IGNORE_FILE
|
||||
ignore_list = YAML.load_file(Epafh::EPAFI_IGNORE_FILE)
|
||||
ignore_list.each do |email|
|
||||
@ignore_list << email.strip.downcase
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def load_leads page=1
|
||||
crm_leads_page = @browser.get(@config[:crm][:baseurl] + CRM_LEADS_URL + "?page=#{page}")
|
||||
crm_leads = JSON.parse crm_leads_page.body
|
||||
crm_leads.each do |lead_obj|
|
||||
keep_contact lead_obj['lead']['email'].split(',')
|
||||
keep_contact lead_obj['lead']['alt_email'].split(',')
|
||||
end
|
||||
|
||||
if crm_leads.size > 0 then
|
||||
load_leads (page + 1)
|
||||
end
|
||||
end
|
||||
|
||||
def load_contacts page=1
|
||||
crm_contacts_page = @browser.get(@config[:crm][:baseurl] + CRM_CONTACTS_URL + "?page=#{page}")
|
||||
crm_contacts = JSON.parse crm_contacts_page.body
|
||||
crm_contacts.each do |contact_obj|
|
||||
keep_contact contact_obj['contact']['email'].split(',')
|
||||
keep_contact contact_obj['contact']['alt_email'].split(',')
|
||||
end
|
||||
|
||||
if crm_contacts.size > 0 then
|
||||
load_contacts (page + 1)
|
||||
end
|
||||
#contacts.to_a.sort.join(', ')
|
||||
end
|
||||
|
||||
def keep_contact emails
|
||||
emails = emails.to_a if emails.is_a? Set
|
||||
[emails].flatten.each do |mail|
|
||||
@keep_list << mail.strip.downcase
|
||||
end
|
||||
end
|
||||
|
||||
def ignore_contact emails
|
||||
emails = emails.to_a if emails.is_a? Set
|
||||
[emails].flatten.each do |mail|
|
||||
@ignore_list << mail.strip.downcase
|
||||
end
|
||||
File.open(Epafh::EPAFI_IGNORE_FILE, 'w') do |f|
|
||||
f.write @ignore_list.to_a.to_yaml
|
||||
end
|
||||
end
|
||||
|
||||
def include? mail
|
||||
return (
|
||||
(@ignore_list.include? mail.strip.downcase) or
|
||||
(@keep_list.include? mail.strip.downcase)
|
||||
)
|
||||
end
|
||||
end
|
160
lib/epafh/crawler.rb
Normal file
160
lib/epafh/crawler.rb
Normal file
|
@ -0,0 +1,160 @@
|
|||
|
||||
require 'ruby-progressbar'
|
||||
|
||||
class Epafh::Crawler
|
||||
attr_reader :imap
|
||||
attr_reader :contacts
|
||||
|
||||
TMPMAIL_FILE = '.tmpmail'
|
||||
MAIL_REGEXP = /\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b/
|
||||
|
||||
def initialize config
|
||||
@saved_key = 'RFC822'
|
||||
@filter_headers = 'BODY[HEADER.FIELDS (FROM TO Subject)]'.upcase
|
||||
@config = config
|
||||
@imap = nil
|
||||
@contact_manager = Epafh::ContactManager.new config
|
||||
end
|
||||
|
||||
def connect!
|
||||
@imap = Net::IMAP.new(
|
||||
@config[:imap][:server],
|
||||
ssl: {verify_mode: OpenSSL::SSL::VERIFY_NONE},
|
||||
port: 993
|
||||
)
|
||||
@imap.login(@config[:imap][:login], @config[:imap][:password])
|
||||
#@imap.select(SOURCE_MAILBOX)
|
||||
end
|
||||
|
||||
def disconnect!
|
||||
imap.logout
|
||||
imap.disconnect
|
||||
end
|
||||
|
||||
def examine_message message
|
||||
m = Mail.read_from_string message.attr[@saved_key]
|
||||
return if m.from.nil?
|
||||
return if m.to.nil?
|
||||
|
||||
body_emails = extract_body_mail m.body.parts
|
||||
|
||||
## Create association between extracted addreses and email part
|
||||
mail_struct = {
|
||||
from: [m.from || []].flatten.reject{|e| e.nil?},
|
||||
to: [m.to || []].flatten.reject{|e| e.nil?},
|
||||
cc: [m.cc || []].flatten.reject{|e| e.nil?},
|
||||
body: (body_emails.to_a || []).reject{|e| e.nil?}
|
||||
}
|
||||
#pp m
|
||||
#pp mail_struct
|
||||
emails = Set.new
|
||||
mail_struct.each {|key, val| emails.merge val }
|
||||
remaining_emails = emails.reject{|e| @contact_manager.include?(e) }
|
||||
|
||||
# Skip examination of no addresses are remaining
|
||||
if remaining_emails.empty? then
|
||||
return
|
||||
end
|
||||
|
||||
display_header mail_struct, remaining_emails
|
||||
|
||||
while true
|
||||
begin
|
||||
puts "\n### #{m.subject}"
|
||||
print "#{mail_struct[:from].join(',')} --> #{mail_struct[:to].join(',')} "
|
||||
puts "[Ignore/Add/Skip/Detail] ?"
|
||||
|
||||
i = STDIN.gets
|
||||
case i.strip
|
||||
when /^[iI]$/ then # ignore
|
||||
@contact_manager.ignore_contact remaining_emails
|
||||
break
|
||||
when /^[aA]$/ then # add
|
||||
@contact_manager.keep_contact remaining_emails
|
||||
break
|
||||
when /^[sS]$/ then #skip
|
||||
break
|
||||
when /^[dD]$/ then # decode
|
||||
# puts m.body.decoded
|
||||
File.open(TMPMAIL_FILE + ".2", 'w') do |f|
|
||||
f.write message.attr[@saved_key]
|
||||
end
|
||||
system "formail < #{TMPMAIL_FILE}.2 > #{TMPMAIL_FILE}"
|
||||
system "mutt -R -f #{TMPMAIL_FILE}"
|
||||
end
|
||||
rescue Encoding::ConverterNotFoundError
|
||||
STDERR.puts "ERROR: encoding problem in email. Unable to convert."
|
||||
end
|
||||
end
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
def examine_all
|
||||
@imap.list('', '*').each do |mailbox|
|
||||
puts "\nMAILBOX #{mailbox.name}".yellow
|
||||
next unless mailbox.name =~ /#{@config[:imap][:pattern]}/
|
||||
@imap.examine mailbox.name
|
||||
|
||||
puts "Searching #{mailbox.name}"
|
||||
messages_in_mailbox = @imap.responses['EXISTS'][0]
|
||||
if not messages_in_mailbox then
|
||||
say "#{mailbox.name} does not have any messages"
|
||||
next
|
||||
end
|
||||
|
||||
@imap.select mailbox.name #GYR: TEST
|
||||
ids = @imap.search('SINCE 1-Jan-2001')
|
||||
# NOT OR TO "@agilefant.org" CC "@agilefant.org"')
|
||||
if ids.empty?
|
||||
puts "\tFound no messages"
|
||||
else
|
||||
examine_message_list mailbox.name, ids
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def examine_message_list mailbox_name, ids
|
||||
progressbar = ProgressBar.create(:total => ids.size)
|
||||
|
||||
ids.each do |id|
|
||||
@imap.select mailbox_name #GYR: TEST
|
||||
message = imap.fetch(id, [@saved_key])[0]
|
||||
examine_message message
|
||||
progressbar.increment
|
||||
end
|
||||
rescue IOError
|
||||
# re-connect and try again
|
||||
connect!
|
||||
retry
|
||||
end
|
||||
|
||||
def extract_body_mail body_parts
|
||||
body_emails = Set.new
|
||||
body_parts.each do |part|
|
||||
next if part.content_type != 'text/plain'
|
||||
|
||||
part_emails = part.decoded.scan MAIL_REGEXP
|
||||
if not part_emails.empty? then
|
||||
body_emails.merge part_emails
|
||||
end
|
||||
end
|
||||
body_emails
|
||||
end
|
||||
|
||||
def display_header header_struct, remaining_emails
|
||||
puts ""
|
||||
header_struct.each do |key, list|
|
||||
#pp list
|
||||
list.each do |addr|
|
||||
addr_str = if remaining_emails.include? addr then
|
||||
addr.yellow.on_black
|
||||
else addr
|
||||
end
|
||||
str = "%4s: %s" % [key.to_s.upcase, addr_str]
|
||||
puts str
|
||||
end
|
||||
end
|
||||
puts ""
|
||||
end
|
||||
end
|
11
lib/epafh/hash.rb
Normal file
11
lib/epafh/hash.rb
Normal file
|
@ -0,0 +1,11 @@
|
|||
|
||||
class Hash
|
||||
#take keys of hash and transform those to a symbols
|
||||
def self.transform_keys_to_symbols(value)
|
||||
return value if not value.is_a?(Hash)
|
||||
hash = value.inject({}) do |memo,(k,v)|
|
||||
memo[k.to_sym] = Hash.transform_keys_to_symbols(v); memo
|
||||
end
|
||||
return hash
|
||||
end
|
||||
end
|
|
@ -1,3 +0,0 @@
|
|||
module Epafh
|
||||
VERSION = "0.1.0"
|
||||
end
|
Loading…
Reference in a new issue