#!/usr/bin/env python2
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import sys, re
from collections import defaultdict

re_fromdomain = re.compile( "[^\.]+(?:\.(?:..|com|net|info|aero|arpa|asia|coop|jobs|mobi|museum|name|travel|biz|cat|edu|gov|int|mil|org|pro|tel|club))+$", re.I )
MINIMUM_FOO_COUNT = 10

def output_stat( filename, statfrom ):
    f = open( filename, "wt" )
    print >> f, "%-50s %15s %15s %15s %15s %20s %20s" % ( "From", "Antifoo", "AntiFoo_Move", "Foo", "Foo_Move", "Uniq_Compl_AntiFoo", "Uniq_Compl_Foo" )
    for key, stat in sorted( statfrom.iteritems(), key = lambda ( key, stat ): stat.antifoo, reverse = True ):
        if stat.foo < MINIMUM_FOO_COUNT and stat.antifoo < MINIMUM_FOO_COUNT:
            continue
        print >> f, "%-50s %15d %15d %15d %15d %20d %20d" % ( key, stat.antifoo, stat.antifoo_move, stat.foo, stat.foo_move, len( stat.uniq_antifoo ), len( stat.uniq_foo ) )

class Info:
    antifoo = 0
    antifoo_move = 0
    foo = 0
    foo_move = 0
    uniq_antifoo = set()
    uniq_foo = set()

    def __init__( self ):
        self.uniq_antifoo = set()
        self.uniq_foo = set()

stat, stathost = defaultdict( Info ), defaultdict( Info )

for line in sys.stdin:
    try:
        parts = line.split( "\t" )
        fromaddr  = parts[ 1 ].strip( "'" )
        if fromaddr == "-":
            continue

        match = re_fromdomain.search( fromaddr.split( "@" )[ -1 ] )
        if match:
            host = match.group( 0 )
        else:
            host = ""

        spam = parts[ 2 ]
        move = parts[ 5 ].find( "_MV" ) >= 0
        suid = parts[ 7 ]

        for info in ( stat[ fromaddr ], stathost[ host ] if host else None ):
            if info is None:
                continue
            info.foo += 1 if spam == "S" else 0
            info.foo_move += 1 if spam == "S" and move else 0
            info.antifoo += 1 if spam == "H" else 0
            info.antifoo_move += 1 if spam == "H" and move else 0
            if spam == "S":
                info.uniq_foo.add( suid )
            if spam == "H":
                info.uniq_antifoo.add( suid )

    except:
        pass

output_stat( sys.argv[ 1 ], stat )
output_stat( sys.argv[ 2 ], stathost )
