# -*- coding: UTF-8 -*-

from mapreducelib import MapReduce, Record
from collections import defaultdict
from datetime import datetime, timedelta, date
import libra
import urllib, re, random, cgi



def Reduce(key, recs):
    uid = key
    if uid[0] != 'y':
        return

    try:
        s = libra.ParseSession(recs, './blockstat.dict')
    except:
        return

    e1 = '21465'
    e2 = '21466'

    isCl = 0
    qCl = ''

    params = ['within','rstr','site','wordforms','lang','mime','from_date_full','to_date_full']

    for r in s:
        if r.IsA('TYandexWebRequest'):
            ui = 'Desktop'
        else:
            continue

        if r.ServiceDomRegion != 'ru':
            continue

        ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]

        q = r.Query

        if r.HasTestID(e1):
            slot = 'exp ' + e1
            TI = 0
        elif r.HasTestID(e2):
            slot = 'exp ' + e2
            TI = 1
        else:
            continue

        ua = str(r.UserAgent)
        req = str(r.FullRequest)

        qs = req.split('?',1)
        if len(qs) < 2:
            continue

        qs = qs[1]
        data = cgi.parse_qs(qs)

        within = '-'
        rstr = '-'
        site = '-'
        word = '-'
        lang = '-'
        mime = '-'
        from_d = '-'
        to_d = '-'

        for k in data.keys():
            if 'within' in k:
                within = k
            if 'rstr' in k:
                rstr = k
            if 'site' in k:
                site = k
            if 'wordforms' in k:
                word = k
            if 'lang' in k:
                lang = k
            if 'mime' in k:
                mime = k
            if 'from_date_full' in k:
                from_d = k
            if 'to_date_full' in k:
                to_d = k

        params = [within, rstr, site, word, lang, mime, from_d, to_d]

        isP = 0
        for tech in r.GetYandexTechEvents():
            if not tech.IsA('TYandexTechEvent'):
                continue
            p = tech.Path
            if '690.405.487' in p or 'tech.pager.show' in p:
                isP = 1
                break

        clicks = 0
        isHead = 0
        for bl in r.GetBSBlocks():
            p = bl.Path
            if 'head/advanced-search' in p:
                isHead = 1

                for cl in r.GetClicks():
                    clicks += 1
                    p = cl.ConvertedPath
                    dw = cl.DwellTimeOnService
                    url = str(cl.Url)
                    yield Record(uid,'',slot + '\t' + ts + '\t' + q + '\t' + str(isHead) + '\t' + p + '\t' + str(dw) + '\t' + url, tableIndex = 1) # clicks on serps

                yield Record(uid,'',slot + '\t' + ts + '\t' + q + '\t' + str(isHead) + '\t' + str(params), tableIndex = 2) # serps
                break

        for cl in r.GetClicks():
            p = cl.ConvertedPath
            if 'soo/' in p:
                yield Record(uid,'',slot + '\t' + ts + '\t' + q + '\t' + p + '\t' + str(isP) + '\t' + str(cl.Url) + '\t' + ua + '\t' + req, tableIndex = 0) # soo clicks
                break


def main():

    MapReduce.useDefaults(
                            server   = 'sakura.search.yandex.net:8013',
                            username = 'userstats',
                            mrExec   = '/Berkanavt/bin/mapreduce-dev',
                            verbose  = True,
                            #testMode = True,
                         )

    cur_d1 = datetime.strptime('20160220', '%Y%m%d')
    cur_d1 = cur_d1.date()

    cur_d = str(cur_d1).replace('-','')

    while int(cur_d) < int(20160301):
        print cur_d
        src = 'user_sessions/' + cur_d

        dt0 = 'ensuetina/SOO_EXP/1/soo_clicks'
        dt1 = 'ensuetina/SOO_EXP/1/clicks_on_soo_serps'
        dt2 = 'ensuetina/SOO_EXP/1/soo_serps'

        MapReduce.runReduce(Reduce,
                            srcTable = src,
                            dstTables = [dt0,dt1,dt2],
                            files = ['/home/ensuetina/data/blockstat.dict'],
                            appendMode = True,
                            sortMode = True
                            )

        cur_d1 = cur_d1 + timedelta(days=1)

        cur_d = str(cur_d1).replace('-','')


if __name__ == '__main__':
    main()
