#!/usr/bin/env python
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import sys
import re
import codecs
import string

def get_root_domains( filename ):
    lines = codecs.open( filename, "rt", "utf-8" ).readlines()
    return filter( len, map( string.strip, lines ) )

def get_2ndlevel_regexp( domains ):
    domains_escaped = map( re.escape, domains )

    return re.compile( "(?:^|\.)([^\.]+(?:%s)+)$" % \
            u"|".join( domains_escaped ) )


def get_2ndlevel_domain( url ):
    if not hasattr( get_2ndlevel_domain, "re_domain2nd" ):
        domains = get_root_domains( "/etc/cron.yandex/rootdomains.txt" )
        get_2ndlevel_domain.re_domain2nd = get_2ndlevel_regexp( domains )

    match = get_2ndlevel_domain.re_domain2nd.search( url )
    if match:
        return match.group( 1 )
    return url


domain = "domain" in sys.argv[ 1: ]
suid = "suid" in sys.argv[ 1: ]

for line in sys.stdin:
    parts = line.split( "\t" )
    from_addr = parts[ 1 ]
    if domain:
        from_addr = from_addr.split( "@" )[ -1 ]
    rcpts = parts[ 12 ]
    flags = parts[ 5 ]
    rdns = get_2ndlevel_domain( parts[ 7 ] )

    if not suid:
        print from_addr, rdns, "_" +  "_".join( set( parts[ 5 ].split( "_" ) ) & set( [ "ES", "PS" ] ) )
        continue
    if rcpts.find( "," ) >= 0:
        continue
    for rcpt in rcpts.split( "," ):
        print from_addr, rcpt.split( "_" )[ -1 ], rdns, "_" +  "_".join( set( parts[ 5 ].split( "_" ) ) & set( [ "ES", "PS" ] ) )

