#!/usr/bin/env python
# -*- coding: utf-8 -*-

# import libra
import yt.wrapper as yt
import sys
import argparse


def HandleOption():
    parser = argparse.ArgumentParser()
    parser.add_argument("--server", dest="server", help="yt server",default='hahn.yt.yandex.net', required=False)
    parser.add_argument("--bs", dest="blockstat", help="path to blockstat.dict",default='/home/galamaj/blockstat.dict', required=False)
    return parser

def findyuid(rec):
    tmp = rec['value'].split('\t')
    uid = ''
    ok = False
    for t in tmp:
        if t == 'path=183.2155.2154':
            ok = 'start'
        if t == 'path=183.2155.2368':
            ok = 'data'
        if t == 'path=183.2155.1309':
            ok = 'init'
        if t == 'path=183.2155.529':
            ok = 'stop'
        if t == 'path=183.2155.1030':
            ok = 'error'
        if t == 'path=183.2155.2040':
            ok = 'continue'
        if t == 'path=183.2155.1827.529':
            ok = 'paranja_stop'
        if t == 'path=183.2155.177':
            ok = 'help'
        if t == 'path=183.2155.1827.2040':
            ok = 'paranja_continue'
        if t[:10] == 'yandexuid=':
            uid = t[10:]
    if ok:
        yield {'uid' : uid, 'type': ok}


class Filter(object):
    def __init__(self, list):
        self._list = list

    def __call__(self, key, recs):
        uid = key
        clean = uid['key']
        if not clean in self._list:
            match = False
        else:
            match = True
        print >> sys.stderr, clean, match
        try:
            session = libra.ParseSession(recs, './blockstat.dict')
        except:
            print >> sys.stderr, 'libra error!'
            return

        for request in session:
            if not request.IsA("TTouchYandexWebRequest"):
                continue

            browser = request.GetBrowser()
            yield {'user': clean, 'browser' : browser, 'match' : match}

def clear(key, recs):
    user = key['user']
    browser = ''
    for r in recs:
        if str(r['match']) == 'False':
            continue
        else:
            browser = r['browser']
    if browser != '':
        yield {'user': user, 'browser' : browser}


def main():
    args = HandleOption().parse_args()
    yt.update_config({'proxy': {'url': args.server}})
    days = ["2017-03-28", "2017-03-29", "2017-03-30", "2017-03-31", "2017-04-01", "2017-04-02", "2017-04-03", "2017-04-04", "2017-04-05", "2017-04-06", "2017-04-07", "2017-04-08", "2017-04-09", "2017-04-10"]#'2016-12-09', '2016-12-10', '2016-12-11', '2016-12-12', '2016-12-13', '2016-12-14', '2016-12-15', '2016-12-27', '2016-12-28', '2016-12-29', '2016-12-30', '2016-12-31', '2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04', '2017-01-05', '2017-01-06', '2017-01-07', '2017-01-08', '2017-02-10','2017-02-11', '2017-02-12', '2017-02-13','2017-02-14', '2017-02-15',
    uids = {}

    for day in days:
        redir = '//home/logfeller/logs/redir-log/1d/' + day
        usersessions = '//user_sessions/pub/search/daily/' + day + '/clean'
        output = '//home/suggest-dev/galamaj/analytics/web_voice/def_browser' + day
        browsers = {'start': {}, 'data': {}, 'init': {}, 'stop' : {}, 'help' : {}, 'continue' : {}, 'paranja_stop' : {}, 'paranja_continue' : {}, 'error' : {}}
        notfound = 0
        exprt_32 = []
        exprt_33 = []
        # if not yt.exists(output):
        #     yt.create_table(path=output, recursive=True)
        # yt.run_map(findyuid,
        #            source_table = redir,
        #            destination_table = output + '_uids',
        #            spec = {'data_size_per_job': 16000000000} #~16GB
        #            )
        ulist_32 = {'start': [], 'data': [], 'init': [], 'stop' : [], 'help' : [], 'continue' : [], 'paranja_stop' : [], 'paranja_continue' : [], 'error' : []}
        test = {}
        ulist_33 = {'start': [], 'data': [], 'init': [], 'stop' : [], 'help' : [], 'continue' : [], 'paranja_stop' : [], 'paranja_continue' : [], 'error' : []}
        ulist = {'start': [], 'data': [], 'init': [], 'stop' : [], 'help' : [], 'continue' : [], 'paranja_stop' : [], 'paranja_continue' : [], 'error' : []}
        for row in yt.read_table('//home/suggest-dev/galamaj/analytics/web_voice/from_redir_2017-03-28'):#' + day + 'fin'):
            if row['exprt'] == '41632':
                exprt_32.append(row['uid'])
            if row['exprt'] == '41633':
                exprt_33.append(row['uid'])
        for row in yt.read_table(output + '_uids'):
            uid = 'y' + row['uid']
            if uid in exprt_32:
                ulist_32[row['type']].append(uid)
            elif uid in exprt_33:
                ulist_33[row['type']].append(uid)
            else:
                ulist[row['type']].append('y' + row['uid'])
        print len(ulist_32['start'])
        print len(ulist_33['start'])
        # yt.run_reduce(Filter(ulist['start']),
        #               source_table = usersessions,
        #               destination_table = output + '_browsers',
        #               local_files = [args.blockstat],
        #               reduce_by = 'key',
        #               spec = {'data_size_per_job': 16000000000} #~16GB
        #               )
        # yt.run_sort(source_table = output + '_browsers',
        #             destination_table = output + '_browsers',
        #             sort_by = 'user'
        #             )
        # yt.run_reduce(clear,
        #               source_table = output + '_browsers',
        #               destination_table = output + '_browsers',
        #               reduce_by = 'user',
        #               spec = {'data_size_per_job': 16000000000} #~16GB
        #               )
        print day
        for row in yt.read_table(output + '_browsers'):
            if not row['user'] in uids.keys():
                uids[row['user']] = row['browser']
                # print row['user'], uids[row['user']]
        for type in ulist_32:
            print type
            for u in ulist_32[type]:
                try:
                    bro = str(uids[u])
                except:
                    notfound += 1
                    continue
                # print browsers[type]
                if bro in browsers[type]:
                    browsers[type][bro] += 1
                else:
                    browsers[type][bro] = 1
        for k in browsers.keys():

            y = []
            g = []
            for i in browsers[k].keys():
                bro = i.lstrip("['").rstrip("']").split(",")
                j = browsers[k][i]
                if bro[0].rstrip("'") == "YandexBrowser":
                    y.append(j)
                if bro[0].rstrip("'") == "GoogleChrome":
                    g.append(j)
            print k
            print "YandexBrowser" + "\t" + str(sum(y))
            print "GoogleChrome" + "\t" + str(sum(g))
        print notfound


if __name__ == '__main__':
    main()
