commit 866b079a98d1922551891e713af0a41ffdf1025a Author: Glenn Y. Rolland Date: Sat Jun 11 13:44:06 2016 +0200 Initial import. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1a57f63 --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +.tmpmail* +vendor/bundle* +/.bundle/ +/.yardoc +#/Gemfile.lock +/_yardoc/ +/coverage/ +/doc/ +/pkg/ +/spec/reports/ +/tmp/ diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..93ac6db --- /dev/null +++ b/.travis.yml @@ -0,0 +1,6 @@ +language: ruby +rvm: + - 2.2.3 +before_install: gem install bundler -v 1.10.5 +script: + - bundle exec rake test diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..1f2dc94 --- /dev/null +++ b/Gemfile @@ -0,0 +1,4 @@ +source "https://rubygems.org" + +# Specify your gem's dependencies in ewoga.gemspec +gemspec diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..b452016 --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,69 @@ +PATH + remote: . + specs: + ewoga (0.1.0) + colorize + hash_validator + json + mail (~> 2.6.3) + mechanize + pry + pry-rescue + thor + +GEM + remote: https://rubygems.org/ + specs: + coderay (1.1.0) + colorize (0.7.7) + domain_name (0.5.25) + unf (>= 0.0.5, < 1.0.0) + hash_validator (0.4.0) + http-cookie (1.0.2) + domain_name (~> 0.5) + interception (0.5) + json (1.8.3) + mail (2.6.3) + mime-types (>= 1.16, < 3) + mechanize (2.7.3) + domain_name (~> 0.5, >= 0.5.1) + http-cookie (~> 1.0) + mime-types (~> 2.0) + net-http-digest_auth (~> 1.1, >= 1.1.1) + net-http-persistent (~> 2.5, >= 2.5.2) + nokogiri (~> 1.4) + ntlm-http (~> 0.1, >= 0.1.1) + webrobots (>= 0.0.9, < 0.2) + method_source (0.8.2) + mime-types (2.99) + mini_portile (0.6.2) + net-http-digest_auth (1.4) + net-http-persistent (2.9.4) + nokogiri (1.6.6.4) + mini_portile (~> 0.6.0) + ntlm-http (0.1.1) + pry (0.10.3) + coderay (~> 1.1.0) + method_source (~> 0.8.1) + slop (~> 3.4) + pry-rescue (1.4.2) + interception (>= 0.5) + pry + rake (10.4.2) + slop (3.6.0) + thor (0.19.1) + unf (0.1.4) + unf_ext + unf_ext (0.0.7.1) + webrobots (0.1.1) + +PLATFORMS + ruby + +DEPENDENCIES + bundler (~> 1.10) + ewoga! + rake (~> 10.0) + +BUNDLED WITH + 1.10.5 diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..65c5ca8 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/README.md b/README.md new file mode 100644 index 0000000..63c7896 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +Ewoga (Email WOrk GAtherer) +============= + diff --git a/Rakefile b/Rakefile new file mode 100644 index 0000000..0dc9771 --- /dev/null +++ b/Rakefile @@ -0,0 +1,26 @@ + +require 'rake' +require 'rake/testtask' + +Rake::TaskManager.record_task_metadata = true + +require 'bundler/gem_tasks' + +Rake::TestTask.new do |t| + #t.warning = true + t.verbose = true + t.libs << "spec" + t.test_files = FileList['spec/**/*_spec.rb'] +end + +# Set default task to list all task +desc 'Default task (build)' +task :default do + puts 'Usage : rake ' + puts '' + + Rake::application.options.show_tasks = :tasks # this solves sidewaysmilk problem + Rake::application.options.show_task_pattern = // + Rake::application.display_tasks_and_comments +end + diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..58a91ff --- /dev/null +++ b/TODO.md @@ -0,0 +1,26 @@ +TODO +==== + +## Features + +* Add POP3 server support + + +## Documentation + +* Write a tutorial (installation, configuration, usage). + + +## Command line + +* Add more actions : + * init - create empty configuration files + * all - get messages for all boxes + * new (by default) - get only new messages since last check + + +## Configuration + +* Add regexp support for ignore list +* Add an ignore list (and/or regexp) for mailboxes + diff --git a/bin/ewoga b/bin/ewoga new file mode 100755 index 0000000..5cafe52 --- /dev/null +++ b/bin/ewoga @@ -0,0 +1,366 @@ +#!/usr/bin/env ruby + +#require 'bundler/setup' +#Bundler.require + +require 'pry' +require 'zlib' +require 'net/imap' +require 'pp' +require 'mechanize' +require 'yaml' +require 'hash_validator' +require 'uri' +require 'thor' +require 'json' +require 'mail' +require 'colorize' + +#Net::IMAP.debug = true + +class Hash + #take keys of hash and transform those to a symbols + def self.transform_keys_to_symbols(value) + return value if not value.is_a?(Hash) + hash = value.inject({}) do |memo,(k,v)| + memo[k.to_sym] = Hash.transform_keys_to_symbols(v); memo + end + return hash + end +end + +module Ewoga + EPAFI_CONFIG_FILE = File.join(ENV['HOME'],'.ewoga','config.yml') + EPAFI_IGNORE_FILE = File.join(ENV['HOME'],'.ewoga','ignore.yml') + + class ContactManager + + CRM_LOGIN_URL = '/login' + CRM_LEADS_URL = '/leads.json' + CRM_CONTACTS_URL = '/contacts.json' + + + def initialize config + @config = config + + @browser = Mechanize.new { |agent| + agent.user_agent_alias = 'Mac Safari' + } + @ignore_list = Set.new + @keep_list = Set.new + + ## Load configuration file + # + + unless File.exist? EPAFI_CONFIG_FILE then + raise "Unable to find configuration file #{EPAFI_CONFIG_FILE}" + end + @config = config + + + connect! + load_contacts + load_leads + load_ignore + #puts @keep_list.to_a + rescue RuntimeError => e + STDERR.puts e.message + end + + def connect! + @browser.get(@config[:crm][:baseurl] + CRM_LOGIN_URL) do |page| + page.form_with(action: '/authentication') do |f| + f['authentication[username]'] = @config[:crm][:login] + f['authentication[password]'] = @config[:crm][:password] + end.click_button + end + + rescue Mechanize::ResponseCodeError + raise "Authentication error. Verify your credentials." + end + + def load_ignore + if File.exist? EPAFI_IGNORE_FILE + ignore_list = YAML.load_file(EPAFI_IGNORE_FILE) + ignore_list.each do |email| + @ignore_list << email.strip.downcase + end + end + end + + def load_leads page=1 + crm_leads_page = @browser.get(@config[:crm][:baseurl] + CRM_LEADS_URL + "?page=#{page}") + crm_leads = JSON.parse crm_leads_page.body + crm_leads.each do |lead_obj| + keep_contact lead_obj['lead']['email'].split(',') + keep_contact lead_obj['lead']['alt_email'].split(',') + end + + if crm_leads.size > 0 then + load_leads (page + 1) + end + end + + def load_contacts page=1 + crm_contacts_page = @browser.get(@config[:crm][:baseurl] + CRM_CONTACTS_URL + "?page=#{page}") + crm_contacts = JSON.parse crm_contacts_page.body + crm_contacts.each do |contact_obj| + keep_contact contact_obj['contact']['email'].split(',') + keep_contact contact_obj['contact']['alt_email'].split(',') + end + + if crm_contacts.size > 0 then + load_contacts (page + 1) + end + #contacts.to_a.sort.join(', ') + end + + def keep_contact emails + emails = emails.to_a if emails.is_a? Set + [emails].flatten.each do |mail| + @keep_list << mail.strip.downcase + end + end + + def ignore_contact emails + emails = emails.to_a if emails.is_a? Set + [emails].flatten.each do |mail| + @ignore_list << mail.strip.downcase + end + File.open(EPAFI_IGNORE_FILE, 'w') do |f| + f.write @ignore_list.to_a.to_yaml + end + end + + def include? mail + return ( + (@ignore_list.include? mail.strip.downcase) or + (@keep_list.include? mail.strip.downcase) + ) + end + end + + class CrawlerApp + attr_reader :imap + attr_reader :contacts + + TMPMAIL_FILE = '.tmpmail' + + def initialize config + @saved_key = 'RFC822' + @filter_headers = 'BODY[HEADER.FIELDS (FROM TO Subject)]'.upcase + @config = config + @imap = nil + @contact_manager = ContactManager.new config + end + + + def connect! + @imap = Net::IMAP.new( + @config[:imap][:server], + ssl: {verify_mode: OpenSSL::SSL::VERIFY_NONE}, + port: 993 + ) + @imap.login(@config[:imap][:login], @config[:imap][:password]) + #@imap.select(SOURCE_MAILBOX) + end + + def disconnect! + imap.logout + imap.disconnect + end + + MAIL_REGEXP = /\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b/ + + def examine_message message + m = Mail.read_from_string message.attr[@saved_key] + return if m.from.nil? + return if m.to.nil? + + + emails = Set.new + begin + emails.merge m.from + emails.merge [m.to].flatten if m.to + emails.merge [m.cc].flatten if m.cc + rescue => e + binding.pry + end + + body_emails = Set.new + m.body.parts.each do |part| + next if part.content_type != 'text/plain' + + #body_emails = m.body.decoded.scan MAIL_REGEXP + part_emails = part.decoded.scan MAIL_REGEXP + #pp body_emails + if not part_emails.empty? then + body_emails.merge part_emails + end + end + emails.merge body_emails + + # puts emails.to_a.join(' , ') + remaining_emails = ( + emails + .map{ |e| [e, (@contact_manager.include? e)] } + .select{ |e,t| !t } + ) + seen_emails = ( + remaining_emails + .empty? + ) + # puts @contacts.to_a.join(', ') + if seen_emails then + print "." + return + else + puts "" + all_addr = { + from: (m.from || []), + to: (m.to || []), + cc: (m.cc || []), + body: (body_emails || []) + } + all_addr.each do |key, list| + list.each do |addr| + addr_str = if remaining_emails.map{|e,t| e}.include? addr then + addr.yellow.on_black + else addr + end + str = "%4s: %s" % [key.to_s.upcase, addr_str] + puts str + end + end + puts "" + #puts " ORIGINAL EMAILS: #{emails.to_a.join(', ')}" + #puts "REMAINING EMAILS: #{remaining_emails.map{|e,t| e}.join(', ')}".yellow.on_black + #puts " SEEN EMAILS: #{seen_emails}" + end + + while true + begin + puts "\n### #{m.subject}" + print "#{m.from.join(',')} --> #{m.to.join(',')} " + puts "[Ignore/Add/Skip/Detail] ?" + + i = STDIN.gets + case i.strip + when /^[iI]$/ then # ignore + @contact_manager.ignore_contact remaining_emails.map{|e,t| e} + break + when /^[aA]$/ then # add + @contact_manager.keep_contact remaining_emails.map{|e,t| e} + break + when /^[sS]$/ then #skip + break + when /^[dD]$/ then # decode + # puts m.body.decoded + File.open(TMPMAIL_FILE + ".2", 'w') do |f| + f.write message.attr[@saved_key] + end + system "formail < #{TMPMAIL_FILE}.2 > #{TMPMAIL_FILE}" + system "mutt -R -f #{TMPMAIL_FILE}" + end + rescue Encoding::ConverterNotFoundError + STDERR.puts "ERROR: encoding problem in email. Unable to convert." + end + end + + return + end + + def examine_all + @imap.list('', '*').each do |mailbox| + puts "\nMAILBOX #{mailbox.name}".yellow + next unless mailbox.name =~ /#{@config[:imap][:pattern]}/ + @imap.examine mailbox.name + + puts "Searching #{mailbox.name}" + messages_in_mailbox = @imap.responses['EXISTS'][0] + if not messages_in_mailbox then + say "#{mailbox.name} does not have any messages" + next + end + + @imap.select mailbox.name #GYR: TEST + ids = @imap.search('SINCE 1-Jan-2001') + # NOT OR TO "@agilefant.org" CC "@agilefant.org"') + if ids.empty? + puts "\tFound no messages" + else + examine_message_list mailbox.name, ids + end + end + end + + def examine_message_list mailbox_name, ids + ids.each do |id| + @imap.select mailbox_name #GYR: TEST + message = imap.fetch(id, [@saved_key])[0] + examine_message message + end + rescue IOError + # re-connect and try again + connect! + retry + end + end + + class Crawler < Thor + + CONFIG_FILE = 'config/secrey.yml' + + include Thor::Actions + default_task :crawl + + + desc 'crawl', 'Crawls email to save mails' + def crawl + #saved_info = [] + parse_configuration + + ## Run application + app = CrawlerApp.new @config + + app.connect! + app.examine_all + #pp saved_info + app.disconnect! + end + + def initialize *args + @config = {} + super + end + + private + + + def parse_configuration + ## Load configuration + @config.merge! Hash.transform_keys_to_symbols( + YAML::load( File.open( EPAFI_CONFIG_FILE ) ) + ) + + ## Validate configuration structure + validations = { + crm: { + baseurl: lambda { |url| url =~ URI::regexp }, + login: 'string', + password: 'string' + }, + imap: { + server: 'string', + login: 'string', + password: 'string' + } + } + validator = HashValidator.validate(@config, validations) + raise "Configuration is not valid: #{validator.errors.inspect}" unless validator.valid? + end + end +end + +Ewoga::Crawler.start + diff --git a/config/config.yml.sample b/config/config.yml.sample new file mode 100644 index 0000000..f81948a --- /dev/null +++ b/config/config.yml.sample @@ -0,0 +1,10 @@ +--- +crm: + baseurl: http://url.of.your.fatfreecrm.server + login: johnsmith + password: **** +imap: + pattern: '.*' + server: hostname.of.you.imap.server + login: johnsmith + password: **** diff --git a/config/ignore.yml.sample b/config/ignore.yml.sample new file mode 100644 index 0000000..84c12d8 --- /dev/null +++ b/config/ignore.yml.sample @@ -0,0 +1,3 @@ +--- +- badguy@example.com +- somespammer@example.com diff --git a/ewoga.gemspec b/ewoga.gemspec new file mode 100644 index 0000000..d724ff1 --- /dev/null +++ b/ewoga.gemspec @@ -0,0 +1,40 @@ +# coding: utf-8 +lib = File.expand_path('../lib', __FILE__) +$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) +require 'ewoga/version' + +Gem::Specification.new do |spec| + spec.name = "ewoga" + spec.version = Ewoga::VERSION + spec.authors = ["@@@No user configured@@@"] + spec.email = ["@@@No user configured@@@"] + + spec.summary = %q{TODO: Write a short summary, because Rubygems requires one.} + spec.description = %q{TODO: Write a longer description or delete this line.} + spec.homepage = "TODO: Put your gem's website or public repo URL here." + spec.license = "LGPL-3" + + # Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or + # delete this section to allow pushing this gem to any host. + if spec.respond_to?(:metadata) + spec.metadata['allowed_push_host'] = "TODO: Set to 'http://mygemserver.com'" + else + raise "RubyGems 2.0 or newer is required to protect against public gem pushes." + end + + spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) } + spec.bindir = "bin" + spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } + spec.require_paths = ["lib"] + + spec.add_development_dependency "bundler", "~> 1.10" + spec.add_development_dependency "rake", "~> 10.0" + spec.add_runtime_dependency "mail", "~> 2.6.3" + spec.add_runtime_dependency "json" + spec.add_runtime_dependency "thor" + spec.add_runtime_dependency "mechanize" + spec.add_runtime_dependency "colorize" + spec.add_runtime_dependency "hash_validator" + spec.add_runtime_dependency "pry" + spec.add_runtime_dependency "pry-rescue" +end diff --git a/lib/ewoga.rb b/lib/ewoga.rb new file mode 100644 index 0000000..1ca3acb --- /dev/null +++ b/lib/ewoga.rb @@ -0,0 +1,5 @@ +require "ewoga/version" + +module Ewoga + # Your code goes here... +end diff --git a/lib/ewoga/version.rb b/lib/ewoga/version.rb new file mode 100644 index 0000000..052ea86 --- /dev/null +++ b/lib/ewoga/version.rb @@ -0,0 +1,3 @@ +module Ewoga + VERSION = "0.1.0" +end