#!/usr/bin/python
#
# SOMEDAY/TODO:
#   * split into 2 worker
#     - first workers to push urls into rabbit
#     - second workers to consume message/make url request
#

import os
import requests
from sgmllib import SGMLParser
import quopri
import argparse

class URLLister(SGMLParser):
    def reset(self):
        SGMLParser.reset(self)
        self.urls = []

    def start_a(self, attrs):
        href = [v for k, v in attrs if k=='href']
        if href:
            self.urls.extend(href)

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-m','--maildir', action='store', default='/dev/shm/maildir')
    parser.add_argument('-v','--verbose', action="store_true")
    args = parser.parse_args()
    
    if args.verbose:
        import syslog
        syslog.openlog(logoption=syslog.LOG_PID, facility=syslog.LOG_MAIL)
    
    maildir_path = args.maildir
    
    if not os.path.exists(maildir_path):
        print "%s: maildir does not exist" % maildir_path
        exit(1)
    
    for dirname, subdirs, files in os.walk(maildir_path):
    
        for name in files:
    
            fullname = os.path.join(dirname, name)
            verbose_msg = "message_path: %s " % fullname
            message = open(fullname).read()
            parser = URLLister()
            if "quoted-printable" in message:
                parser.feed(quopri.decodestring(message))
                verbose_msg += "quoted: yes "
            else:
                try:
                    parser.feed(message)
                except:
                    pass
                verbose_msg += "quoted: no  "
    
            for url in parser.urls:
                if "email_type=all" in url: 
                    url = url.replace("www.twitch.tv","www-origin.twitch.tv")
                    verbose_msg += "url: %s " % url
    
                    try:
                        r = requests.get(url, timeout=3)
                    except:
                        pass
                    else:
                        verbose_msg += "status: %s " % r.status_code
                        os.remove(fullname)
    
            if os.path.isfile(fullname):
                verbose_msg += "status: skipping"
                os.remove(fullname)
                
            if args.verbose:
                syslog.syslog(verbose_msg)

if __name__ == "__main__":
    main()

