# -*- coding: UTF-8 -*-

from mapreducelib import MapReduce, Record
from collections import defaultdict
from datetime import datetime
import libra
import urllib, re,random

def Reduce(key, recs):
    uid = key
    if uid[0] != 'y':
        return

    try:
        s = libra.ParseSession(recs, './blockstat.dict')
    except:
        return

    c = '18500'
    e1 = '18501' # static gallery
    e2 = '18502' # static gallery + play on music
    e3 = '18503' # images wizard
    e4 = '18504' # images wizard + play on music


    for r in s:
        if not r.IsA('TYandexWebRequest'):
            continue

        if r.ServiceDomRegion != 'ru':
            continue

        ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]
        q = str(r.Query)

        if r.HasTestID(c):
            slot = 'control ' + c
        elif r.HasTestID(e1):
            slot = 'static gallery ' + e1
        elif r.HasTestID(e2):
            slot = 'static gallery + play on music ' + e2
        elif r.HasTestID(e3):
            slot = 'images wizard ' + e3
        elif r.HasTestID(e4):
            slot = 'images wizard + play on music ' + e4
        else:
            continue

        sp = r.SearchPropsValues
        reqid = r.ReqID
        spF = sp.get('UPPER.EntitySearch.Log')
        if not 'musician_media_card' in str(spF):
            continue
#            yield Record(uid,'',q + '\t' + str(spF))

#        isV = 0
#        for bl in r.GetBSBlocks():
#            p = vl.Path
#            if 'snippet/video' in p and not 'parallel/result/snippet/video' in p:
#                isV = 1
#                break

#        if isV == 0:
#            continue

#        Cl = 0
#        Cl_long15 = 0
#        Cl_long30 = 0
#        ParallCl = 0
#        ParallCl_long15 = 0
#        ParallCl_long30 = 0

#        for cl in r.GetClicks():
#            p = cl.ConvertedPath
#            dw = cl.DwellTimeOnService
#            if 'parallel/result/snippet/video' in p:
#                ParallCl += 1
#                if dw > 15:
#                    ParallCl_long15 += 1
#                if dw > 30:
#                    ParallCl_long30 += 1
#            elif 'snippet/video' in p and not 'video_snippet' in p:
#                Cl += 1
#                if dw > 15:
#                    Cl_long15 += 1
#                if dw > 30:
#                    Cl_long30 += 1

#        if Cl > 0 or ParallCl > 0:
#            yield Record(uid,'',slot + '\t' + ts + '\t' + str(Cl) + '\t' + str(Cl_long15) + '\t' + str(Cl_long30) + '\t' + str(ParallCl) + '\t' + str(ParallCl_long15) + '\t' + str(ParallCl_long30))
#        continue


        text_images = 0
        text_video = 0
        text_music = 0

        images = 0
        video = 0
        music = 0


        parall_img = 0
        parall_vid = 0
        parall_mus = 0

        all_clicks = 0
        all_long_clicks15 = 0
        all_long_clicks30 = 0

        img_traffic = 0
        vid_traffic = 0
        mus_traffic = 0

        img_long_traffic = 0
        vid_long_traffic = 0
        mus_long_traffic = 0

        text_images_cl = 0
        text_video_cl = 0
        text_music_cl = 0

        for bl in r.GetBSBlocks():
            p = bl.Path
            vv = bl.GetVars()
            if 'parallel/result/snippet/images' in p:
                parall_img = 1
                continue
            if 'parallel/result/snippet/video' in p:
                parall_vid = 1
                continue
            if 'parallel/result/snippet/musicplayer' in p:
                parall_mus = 1
                continue
            if 'snippet/video' in p:
                video = 1
                for v in vv:
                    if '-type' in v and 'text' in v:
                        text_video = 1
                continue
            if 'snippet/images' in p:
                images = 1
                for v in vv:
                    if '-type' in v and 'text' in v:
                        text_images = 1
                continue
            if 'snippet/musicplayer' in p:
                music = 1
                for v in vv:
                    if '-type' in v and 'text' in v:
                        text_music = 1
                continue

        for cl in r.GetClicks():
            p = cl.ConvertedPath
            vv = cl.GetVars()
            dw = int(cl.DwellTimeOnService)
            url = str(cl.Url)
            if 'yandex.ru/images' in url or 'images.yandex.ru' in url:
                img_traffic += 1
                if dw > 30:
                    img_long_traffic += 1
            if 'yandex.ru/video' in url or 'video.yandex.ru' in url:
                vid_traffic += 1
                if dw > 30:
                    vid_long_traffic += 1
            if 'yandex.ru/music' in url or 'music.yandex.ru' in url:
                mus_traffic += 1
                if dw > 30:
                    mus_long_traffic += 1

            if 'snippet/video' in p:
                for v in vv:
                    if '-type' in v and 'text' in v:
                        text_video_cl += 1
            if 'snippet/images' in p:
                for v in vv:
                    if '-type' in v and 'text' in v:
                        text_images_cl += 1
            if 'snippet/musicplayer' in p:
                for v in vv:
                    if '-type' in v and 'text' in v:
                        text_music_cl += 1

            all_clicks += 1
            if dw > 15:
                all_long_clicks15 += 1
            if dw > 30:
                all_long_clicks30 += 1

            yield Record(uid,'',slot + '\t' + ts + '\t' + reqid + '\t' + p + '\t' + str(vv) + '\t' + str(dw) + '\t' + url, tableIndex = 1) # плюемся кликами

        st = slot + '\t' + ts + '\t' + reqid + '\t' + q + '\t' + str(spF)
        st = st + '\t' + str(images) + '\t' + str(video) + '\t' + str(music)
        st = st + '\t' + str(text_images) + '\t' + str(text_video) + '\t' + str(text_music) + '\t' + str(text_images_cl) + '\t' + str(text_video_cl) + '\t' + str(text_music_cl)
        st = st + '\t' + str(parall_img) + '\t' + str(parall_vid) + '\t' + str(parall_mus)
        st = st + '\t' + str(img_traffic) + '\t' + str(vid_traffic) + '\t' + str(mus_traffic)
        st = st + '\t' + str(img_long_traffic) + '\t' + str(vid_long_traffic) + '\t' + str(mus_long_traffic)
        st = st + '\t' + str(all_clicks) + '\t' + str(all_long_clicks15) + '\t' + str(all_long_clicks30)

        yield Record(uid,'',st, tableIndex = 0)

def ReduceVideo(key, recs):
    uid = key
    if uid[0] != 'y':
        return

    try:
        s = libra.ParseSession(recs, './blockstat.dict')
    except:
        return

    c = '18500'
    e1 = '18501' # static gallery
    e2 = '18502' # static gallery + play on music
    e3 = '18503' # images wizard
    e4 = '18504' # images wizard + play on music


    for r in s:
        if not r.IsA('TYandexVideoRequest'):
            continue

        ts =  str(datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]
        q = str(r.Query)

        if r.HasTestID(c):
            slot = 'control ' + c
        elif r.HasTestID(e1):
            slot = 'static gallery ' + e1
        elif r.HasTestID(e2):
            slot = 'static gallery + play on music ' + e2
        elif r.HasTestID(e3):
            slot = 'images wizard ' + e3
        elif r.HasTestID(e4):
            slot = 'images wizard + play on music ' + e4
        else:
            continue

        url = urllib.unquote(r.FullRequest)
        yield Record(uid,'',slot + '\t' + url)



def main():

    MapReduce.useDefaults(
                            server   = 'sakura.search.yandex.net:8013',
                            username = 'userstats',
                            mrExec   = '/Berkanavt/bin/mapreduce-dev',
                            verbose  = True,
                            #testMode = True,
                         )

    dd = ['18','19','20','21','22','23','24','25','26','27','28','29','30']
#    dd = ['20']
    for d in dd:
        src = 'user_sessions/201511' + d

        dt0 = 'ensuetina/OA_WIZARD_EXP/new_dw/data'
        dt1 = 'ensuetina/OA_WIZARD_EXP/new_dw/clicks'

        MapReduce.runReduce(Reduce,
                            srcTable = src,
                            dstTables = [dt0,dt1],
                            files = ['/home/ensuetina/data/blockstat.dict'],
                            appendMode = True,
                            sortMode = True
                            )


if __name__ == '__main__':
    main()
