import sys
import urllib
import urlparse
import random

from regexps_and_stuff import *

def spy_log_record_handler(rec):

    data = rec.value.split(' ')

    fields = data[3].split('&')

    params = {}    

    for f in fields:
        f = f.strip()
        if f == '': continue

        pos = f.find('=')

        if pos != -1:
            key=f[:pos]
            value=f[pos+1:] 
            params[key]=urllib.unquote_plus(value)
        else:
            key=f
            params[key]=None
            if key!='url':
                print >> sys.stderr, "Strange field: '" + f + "'\nin record:" + rec.value

    if 'url' in params: 
        ajax = False
        url = params['url']
    #elif 'decoded_ajax' in params:
    #    url = params['decoded_ajax']
    #    assert url[:3]=='[[:' and url[:-3]=='"]]', url
    #    url = url[3:-3]
    #    ajax = True
    else:
        return "no url"

    if url is None:
        return "no url"

    if not 't' in params or params['t'] is None:
        return "no t"

    url = urlparse.urlparse(url)
    #print >> sys.stderr, url

    scheme = url.scheme
    path = url.path
    fragment = url.fragment
    netloc = url.netloc

    #print >> sys.stderr, scheme, path, fragment, netloc

    if not 'yandex.' in netloc and not 'google.' in netloc:
        return "NotYandexGoogle"

    if netloc[:11] == 'www.yandex.' or netloc[:11] == 'www.google.': 
        netloc=netloc[4:]
    elif netloc[:7] == 'yandex.' or netloc[:7] == 'google.':
        pass
    else:
        return "UnknownDomain:"#+netloc

    if netloc[-3:]=='.ru':
        tld='ru'
        netloc=netloc[:-3]
    elif netloc[-7:]=='.com.ua':
        tld='ua'
        netloc=netloc[:-7]
    elif netloc[-3:]=='.ua':
        tld='ua'
        netloc=netloc[:-3]
    elif netloc[-3:]=='.by':
        tld='by'
        netloc=netloc[:-3]
    elif netloc[-3:]=='.kz':
        tld='kz'
        netloc=netloc[:-3]
    elif netloc[-7:]=='.com.tr':
        tld='com.tr'
        netloc=netloc[:-7]
    elif netloc[-4:]=='.com':
        tld='com'
        netloc=netloc[:-4]
    else:
        return "StrangeTLD:"+netloc

    if netloc!="yandex" and netloc!="google":
        return "StrangeTLD:"+netloc


    if path == '/' and fragment=='':
        service = netloc
    elif path == '/search' or path == '/yandsearch' or (path == '/' and fragment!=''):
        service = netloc + path
        if path =='/':
            service += '#'
    else:
        return "UnknownService:" + netloc# + path
        

    if scheme == 'http':
        https=False
    elif scheme == 'https':
        https=True
    else:
        assert False, "wtf scheme is " + url['scheme']


    ts = int(data[0])
    ip = data[1]

    record = dict()
    
    record['type']='spylog'
    record['reqid']=None
    record['ts']=ts
    record['service']=service
    record['tld']=tld    
    record['https']=https
    record['turbo']=is_turbo(ip)
    record['yasoft']=params['yasoft']
    record['yasoft_version']=params.get('yasoft','Unknown')+"/"+params.get('ver','None')
    httpstatus = params.get('httpstatus',None)
    if httpstatus is not None and httpstatus!=200: return "http"+str(httpstatus)

    timings = params['t'].split('_')

    for tt in timings: 
        ll = tt.split('-')
        if len(ll)==3: return "triplet time"

        t,v = ll

        if t == '1037':
            record['.dns']=int(v)
        elif t == '1038':
            record['.tcp_handshake']=int(v)
        elif t == '1039':
            record['.ttfb']=int(v)
        elif t == '1040':
            record['.html']=int(v)
        elif t == '1040.906':
            record['.html_total']=int(v)
        elif t == '1041':
            record['.ttfp']=int(v)
        elif t == '1041.906':
            record['.ttfp_total']=int(v)

    #print >> sys.stderr, record

    return record







