import sys
from urlparse import urlparse, parse_qs
from collections import defaultdict

# reqans_log

req_subsplit = {'reqwizard': [(';',':')],
        'search_props': [(';',':'), (',','=')],
        'rearr': [(';','=')],
        'reqrelev': [(';', '=')]
        }

ans_subsplit = {'metahosts': [(',', None)],
        'extralinks': [(';','=')],
        'markers': [(',', '=')]
        }

def parseString(l, rec_sep, name_sep):
    if name_sep:
        res = defaultdict(list)
        rec = []
        for s in l.split(rec_sep):
            if name_sep in s:
                if ''.join(rec):
                    rec = rec_sep.join([t for t in rec if t])
                    try:
                        name, val = rec.split(name_sep, 1)
                        res[name].append(val)
                    except:
                        pass
                rec = [s]
            else:
                rec.append(s)
        if ''.join(rec):
            rec = rec_sep.join([t for t in rec if t])
            try:
                name, val = rec.split(name_sep, 1)
                res[name].append(val)
            except:
                pass
        res = dict(res)
        for k, v in res.iteritems():
            if len(v) == 1:
                res[k] = v[0]
        return res
    else:
        return [s for s in l.split(rec_sep) if s]

def parseDeep(l, seps):
    if not seps:
        return l
    res = parseString(l, *seps[0])
    if isinstance(res, dict):
        for k, v in res.iteritems():
            res[k] = [parseDeep(i, seps[1:]) for i in v] if isinstance(v, list) else parseDeep(v, seps[1:])
    else:
        res = [parseDeep(v, seps[1:]) for v in res]
    return res

def parseRARecord(l):
    lines = l.strip().split('\n')
    reqline = lines[0]
    anslines = lines[1:]
    res = {}
    reqdata = parseString(reqline, '@@', '=')
    for k, v in reqdata.iteritems():
        if k in req_subsplit:
            reqdata[k] = parseDeep(v, req_subsplit[k])
    res['request'] = reqdata
    res['results'] = []
    for ansline in anslines:
        ansdata = parseString(ansline, '@@', '=')
        for k, v in ansdata.iteritems():
            if k in ans_subsplit:
                ansdata[k] = parseDeep(v, ans_subsplit[k])
        res['results'].append(ansdata)
    return res

def parseReqans(l):
    lines = l.strip().split('\n')
    reqline = lines[0]
    anslines = lines[1:]
    reqdata = parseString(reqline, '@@', '=')
    for k, v in reqdata.iteritems():
        if k in req_subsplit:
            reqdata[k] = parseDeep(v, req_subsplit[k])
    res = []
    for ansline in anslines:
        ansdata = parseString(ansline, '@@', '=')
        for k, v in ansdata.iteritems():
            if k in ans_subsplit:
                ansdata[k] = parseDeep(v, ans_subsplit[k])
        res.append(ansdata)
    return (reqdata, res)


# blockstat_log

def getBlocks(blocks):
    while blocks:
        b = {}
        b['NAME'] = blocks.pop(0)
        n = int(blocks.pop(0))
        for i in xrange(n):
            kv = blocks.pop(0)
            k, v = kv.split('=', 1)
            b[k] = v
        yield b

def parseBlockstat(l):
    parts = l.split('\xFF')
    if len(parts) < 2:
        raise ValueError
    header = parts[0].split('\t')
    blocks = list(p for p in parts[1].split('\t') if p)
    hdr = {}
    if len(header) < 11:
        raise ValueError
    hdr['host'] = header[9]
    hdr['uri'] = header[8]
    bls = list(b for b in getBlocks(blocks))
    return (hdr, bls)

def getReq(url):
    try:
        if 'yandsearch' not in url and 'search/' not in url:
            return None
        qs = parse_qs(urlparse(url).query)
        txt = qs.get('text')
        if txt:
            return txt[0]#.decode('utf8')
        return str(qs)
    except:
        return url

