# -*- coding: UTF-8 -*-

from datetime import datetime
import libra
import urllib, re,random


def Reduce(key, recs):
    uid = key
    if uid[0] != 'y':
        return

    s = libra.ParseSession(recs, '/home/ensuetina/data/blockstat.dict')

    st = ''

    for r in s:
        if not r.IsA('TYandexWebRequest'):
            continue

        if r.ServiceDomRegion != 'tr':
            continue


        ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]
        reqid = r.ReqID

#        print reqid

        isV = 0
        pos = -1
        for bl in r.GetBSBlocks(): # смотрим, показался ли snippet/video и на какой позиции
            p = bl.Path
            #print p
            vv = bl.GetVars()
            if 'snippet/video' in p:
                for v in vv:
                    if v[0] == 'pos':
                        pos = v[1].replace('p','') # вычисляем позицию
                isV = 1
                wiz = p
                if pos > -1:
                    break

        if isV == 0:
            #print 'NO snip'
            continue

        isWin = 0
        isLoss = 0

        for cl in r.GetClicks(): # вычисляем был ли WIN
            p = cl.ConvertedPath
            #print p
            dw = cl.DwellTime
            vv = cl.GetVars()
            if 'snippet/video' in p:
                print 'CLICK ' + p + ' ' + str(dw)
                if int(dw) > 15:
                    print 'LONG ' + reqid + ' ' + str(dw)
                    isWin = 1

        if int(pos) == -1:
            print 'no position for -- ' + reqid
            continue

        i = 0
        for bl in r.GetMainBlocks(): # вычисляем, был ли LOSS
            m = bl.GetMainResult()
            if m.IsA('TDirectResult'):
                continue

            if i == int(pos) + 1:
                for cl in bl.GetClicks():
                    dw = cl.DwellTime
                    if int(dw) > 15:
                        isLoss = 1
                break

            else:
                i += 1
                continue

        heights = '-'
        wiz_h = ''
        loss_h = ''
        for tech in r.GetYandexTechEvents(): # вычисляем высоту колдунщика и элемента под ним, это пишется в счетчик serp.page.loaded в vars: -res-heights
            if not tech.IsA('TYandexTechEvent'):
                continue
            p = tech.Path
            if '71.143.1007' in p or 'serp.page.loaded' in p:
                vv = tech.Vars
                dd = dict([d.split('=', 1) for d in vv.split(',') if '=' in d])

                if '-res-heights' in dd:
                    hh = dd['-res-heights']
                    heights = dict([d.split(':', 1) for d in hh.split(';') if ':' in d])
                    if str(pos) in heights.keys():
                        wiz_h = heights[str(pos)]
                        loss_h = str(heights.get(str(int(pos)+1)))
                    else:
                        wiz_h = '-'
                        loss_h = '-'
                    break

        st = st  + uid + '\t' + ts + '\t' + reqid + '\t' + wiz + '\t' + str(isWin) + '\t' + str(isLoss) + '\t' + str(pos) + '\t' + str(wiz_h) + '\t' + loss_h + '\t' + str(heights) + '\n'

    return st

class get_rec:
    def __init__(self,k,sk,val):
        self.key = k
        self.subkey = sk
        self.value = val


def main():

#    with open('user_sessions.20141215') as f:
    with open('new_sess.txt') as f:
        recs = f.read().split('\n')

    print len(recs)

    f_recs = {}

    for rec in recs:
        l = rec.split('\t',2)
        try:
            k = l[0]
            sk = l[1]
            val = l[2]
        except:
            print l
            continue
        nrec = get_rec(k,sk,val)
        if not k in f_recs:
            f_recs[k] = [nrec]
        else:
            f_recs[k].append(nrec)

    st = ''
    for k in f_recs.keys():
        print k
        print len(f_recs[k])
        tmp = Reduce(k,f_recs[k])
        st = st + tmp


    with open('out.txt','w') as f:
        f.write(st)


if __name__ == '__main__':
    main()
