# -*- coding: utf-8 -*-
#!/usr/bin/env python2.7


import os
import sys
import datetime
import uatraits
import urllib
import json
import urllib2
from datetime import date
from collections import defaultdict

from mapreducelib import MapReduceClient, Record, MapReduce
from parselib import SessionRecord, DatesRange


detector = uatraits.detector('/usr/share/uatraits/browser.xml')


def get_ys(rec):
    srec = SessionRecord(rec.value)
    if not hasattr(srec, 'cookieys'):
        return
    if not hasattr(srec, 'user_agent'):
        return
    if not hasattr(srec, 'yandexuid'):
        return
    browser = ''
    extension = ''

    ys = str(srec.cookieys)
    uid = str(srec.yandexuid)
    if len(uid) < 5:
        return
    user_agent = str(srec.user_agent)
    agent = detector.detect(user_agent)

    if not 'BrowserName' in agent.keys():
        return
    browser_name = agent['BrowserName'].strip().lower()

    if browser_name == 'chrome':
        browser = 'Chrome'
    if browser_name == 'msie':
        browser = 'MSIE'
    if browser_name == 'yandexbrowser':
        browser = 'YandexBrowser'
    if browser_name == 'opera':
        browser = 'Opera'
    if browser_name == 'firefox':
        browser = 'Firefox'
    if browser_name == 'edge':
        browser = 'Edge'

    if not (browser_name == 'chrome' or browser_name == 'msie' or browser_name == 'yandexbrowser'
            or browser_name == 'opera'
            or browser_name == 'firefox' or browser_name == 'edge'):
        browser = 'Other'

    if ('vbch.' in ys or 'homeextchrome.' in ys or 'homesearchextchrome.' in ys
        or 'searchextchrome.' in ys or 'startextchrome.' in ys or 'altsearchchrome.' in ys):
        yield Record('y' + str(uid) + '\t' + str(browser) + '\t' + 'YsIn', '', '', tableIndex=1)
    else:
        yield Record('y' + str(uid) + '\t' + str(browser) + '\t' + 'YsOut', '', '', tableIndex=2)

    if 'vbch.' in ys:
        extension = 'vbch'
        yield Record('y' + str(uid) + '\t' + str(browser) + '\t' + str(extension), '', '', tableIndex=0)
    if 'homeextchrome.' in ys:
        extension = 'homeextchrome'
        yield Record('y' + str(uid) + '\t' + str(browser) + '\t' + str(extension), '', '', tableIndex=0)
    if 'homesearchextchrome.' in ys:
        extension = 'homesearchextchrome'
        yield Record('y' + str(uid) + '\t' + str(browser) + '\t' + str(extension), '', '', tableIndex=0)
    if 'searchextchrome.' in ys:
        extension = 'searchextchrome'
        yield Record('y' + str(uid) + '\t' + str(browser) + '\t' + str(extension), '', '', tableIndex=0)
    if 'startextchrome.' in ys:
        extension = 'startextchrome'
        yield Record('y' + str(uid) + '\t' + str(browser) + '\t' + str(extension), '', '', tableIndex=0)
    if 'altsearchchrome.' in ys:
        extension = 'altsearchchrome'
        yield Record('y' + str(uid) + '\t' + str(browser) + '\t' + str(extension), '', '', tableIndex=0)


def get_uid(rec):
    srec = SessionRecord(rec.value)
    if not hasattr(srec, 'type'):
        return
    if rec.key[0] != 'y':
        return
    if srec.type != 'REQUEST':
        return
    uid = rec.key
    yield Record(str(uid), '', '')


def split_key(rec):
    key = rec.key
    val = key.strip().split('\t')
    yield Record(str(val[0]), '', str(val[1]) + '\t' + str(val[2]))


def split_key_all_ext(rec):
    key = rec.key
    val = key.strip().split('\t')
    yield Record(str(val[1]) + '\t' + str(val[2]) + '_All', '', '')


def split_for_sum(rec):
    key = rec.key
    val = key.strip().split('\t')
    yield Record(str(val[1]), '', str(val[2]))


def used_yasearch(key, records):
    val = list()
    for rec in records:
        val.append(rec.value)
    if len(val) > 1:
        for i in val:
            if len(i) > 2:
                yield Record(str(i), '', '')


def data_for_stat(rec):
    yield Record(rec.key + '\t' + rec.value, '', '')


def Reduce(key, records):
    yield Record(key, '', '')


def reduce_res(key, records):
    res = sum(1 for rec in records)
    yield Record(key, '', str(res))


def reduce_get_sum(key, records):
    val = list()
    for rec in records:
        val.append(int(rec.value))
    s = sum(val)
    yield Record('AllBrowsers' + '\t' + str(key) + '\t' + str(s), '', '')


def update_statface_report(rows):
    url = 'https://stat.yandex-team.ru/_api/report/data'
    headers = {'StatRobotUser': 'robot_robot-spok', 'StatRobotPassword': 'xeev0Tah3g'}
    values = {'name': 'https://stat.yandex-team.ru/Distribution/Adhoc/YaSoft.Share_of_uids',
              'scale': 'd',
              'data': json.dumps({'values': rows})
    }
    data = urllib.urlencode(values)
    req = urllib2.Request(url, data, headers)
    try:
        response = urllib2.urlopen(req)
    except urllib2.HTTPError as e:
        error_message = e.read()
        print(error_message)
        raise


def update_all(field_date, data):
    heads = {}
    field_date = field_date
    for line in data:
        date = line.split('\t')[0]
        if date == field_date:
            browser = line.split('\t')[1]
            ext = line.split('\t')[2]
            cnt = line.split('\t')[3]
            browser_path = u'\t{}\t'.format(browser)
            paths = [browser_path]

            for path in paths:
                if path not in heads:
                    heads[path] = defaultdict(int)
            for path in paths:
                heads[path][ext] += int(cnt)

    upload_lst = []
    for head in heads:
        tmp = {
            'fielddate': field_date,
            'browser': head,
            'vbch': heads[head]['vbch'],
            'startextchrome': heads[head]['startextchrome'],
            'altsearchchrome': heads[head]['altsearchchrome'],
            'searchextchrome': heads[head]['searchextchrome'],
            'homeextchrome': heads[head]['homeextchrome'],
            'homesearchextchrome': heads[head]['homesearchextchrome'],
            'cookied_visits': heads[head]['YsIn_All'],
            'without_cookied_visits': heads[head]['YsOut_All'],
            'used_yasearch_ys_in': heads[head]['YsIn'],
            'used_yasearch_ys_out': heads[head]['YsOut'],

        }
        upload_lst.append(tmp)
    return update_statface_report(upload_lst)

# def tableExists(t):
#     for table in MapReduce.getTablesInfo(prefix=t):
#         if table.name == t:
#             return True
#     return False
def main():
    MapReduce.useDefaults(
        server='sakura.search.yandex.net',
        # mrExec='/Berkanavt/bin/mapreduce-dev',
        # username='userstats',
        verbose=True,
    )
    yesterday = date.today() - datetime.timedelta(3)
    yesterday_day = yesterday.strftime('%Y%m%d')

    week = yesterday - datetime.timedelta(6)
    week_before = week.strftime('%Y%m%d')

    type_ext0 = 'kosotis/ANSEARCH_540_EXTENSION_REPORT/type_ext'
    total_ext1 = 'kosotis/ANSEARCH_540_EXTENSION_REPORT/total_ext'
    out_ext2 = 'kosotis/ANSEARCH_540_EXTENSION_REPORT/out_ext'

    type_ext_uniq0 = 'kosotis/ANSEARCH_540_EXTENSION_REPORT/type_ext_uniq'
    total_ext_uniq1 = 'kosotis/ANSEARCH_540_EXTENSION_REPORT/total_uniq'
    out_ext_uniq2 = 'kosotis/ANSEARCH_540_EXTENSION_REPORT/out_uniq'

    all_uid = 'kosotis/ANSEARCH_540_EXTENSION_REPORT/all_uid'
    all_uid_uniq = 'kosotis/ANSEARCH_540_EXTENSION_REPORT/all_uid_uniq'
    all_uid_uniq_stable = 'kosotis/ANSEARCH_540_EXTENSION_REPORT/all_uid_uniq_stable'

    split_type_ext = 'kosotis/ANSEARCH_540_EXTENSION_REPORT/split_type_ext'
    split_total_ext = 'kosotis/ANSEARCH_540_EXTENSION_REPORT/split_total'
    split_out_ext = 'kosotis/ANSEARCH_540_EXTENSION_REPORT/split_out'

    type_ext_used_search = 'kosotis/ANSEARCH_540_EXTENSION_REPORT/type_ext_used_search'
    total_ext_used_search = 'kosotis/ANSEARCH_540_EXTENSION_REPORT/total_ext_used_search'
    out_ext_used_search = 'kosotis/ANSEARCH_540_EXTENSION_REPORT/out_ext_used_search'

    ysIn_ysOut_split = 'kosotis/ANSEARCH_540_EXTENSION_REPORT/ysIn_ysOut_split'

    result = 'kosotis/ANSEARCH_540_EXTENSION_REPORT/result'
    final_result = 'kosotis/ANSEARCH_540_EXTENSION_REPORT/final_result'
    final_result_split_sum = 'kosotis/ANSEARCH_540_EXTENSION_REPORT/final_result_split_sum'



    getYsTable = os.path.join('watch_log_tskv', yesterday_day)


    print('RUN_GET_YS_MAP' + '' + str(yesterday_day))
    print(getYsTable)

    print('point1')

    MapReduce.runMap(get_ys, srcTable=getYsTable, dstTables=[type_ext0, total_ext1, out_ext2], appendMode=True, sortMode=True)

    print('point')

    print('RUN_GET_YS_REDUCE' + '' + str(yesterday_day))
    MapReduce.runReduce(Reduce, srcTable=type_ext0, dstTable=type_ext_uniq0, appendMode=True, sortMode=True)
    MapReduce.runReduce(Reduce, srcTable=total_ext1, dstTable=total_ext_uniq1, appendMode=True, sortMode=True)
    MapReduce.runReduce(Reduce, srcTable=out_ext2, dstTable=out_ext_uniq2, appendMode=True, sortMode=True)

    MapReduce.dropTable(type_ext0)
    MapReduce.dropTable(total_ext1)
    MapReduce.dropTable(out_ext2)

    #get all uids from user_sessions during the week
    for day in DatesRange(week_before, yesterday_day):
        getUidTable = os.path.join('user_sessions', day)

        print('RUN_GET_UID_MAP' + '' + str(day))
        MapReduce.runMap(get_uid, srcTable=getUidTable, dstTable=all_uid, appendMode=True, sortMode=True)

        print('RUN_REDUCE_EVERY_DAY' + '' + str(day))
        MapReduce.runReduce(Reduce, srcTable=all_uid, dstTable=all_uid_uniq, appendMode=True, sortMode=True)
    MapReduce.dropTable(all_uid)

    print('RUN_REDUCE_ALL_DAYS' + '' + str(yesterday_day))
    MapReduce.runReduce(Reduce, srcTable=all_uid_uniq, dstTable=all_uid_uniq_stable, appendMode=True, sortMode=True)
    MapReduce.dropTable(all_uid_uniq)

    #split key
    print('RUN_SPLIT_KEY' + '' + str(yesterday_day))
    MapReduce.runMap(split_key, srcTable=type_ext_uniq0, dstTable=split_type_ext, appendMode=True, sortMode=True)
    MapReduce.runMap(split_key, srcTable=total_ext_uniq1, dstTable=split_total_ext, appendMode=True, sortMode=True)
    MapReduce.runMap(split_key, srcTable=out_ext_uniq2, dstTable=split_out_ext, appendMode=True, sortMode=True)

    MapReduce.runMap(split_key_all_ext, srcTables=[total_ext_uniq1, out_ext_uniq2], dstTable=ysIn_ysOut_split,
                     appendMode=True, sortMode=True)

    #get used search uids
    print('RUN_GET_USED_SEARCH_UIDS' + '' + str(yesterday_day))
    MapReduce.runReduce(used_yasearch, srcTables=[split_type_ext, all_uid_uniq_stable], dstTable=type_ext_used_search,
                        appendMode=True, sortMode=True)
    MapReduce.runReduce(used_yasearch, srcTables=[split_total_ext, all_uid_uniq_stable], dstTable=total_ext_used_search,
                        appendMode=True, sortMode=True)
    MapReduce.runReduce(used_yasearch, srcTables=[split_out_ext, all_uid_uniq_stable], dstTable=out_ext_used_search,
                        appendMode=True, sortMode=True)


    #get uniq browser/extension
    print('RUN_GET_RESULTS' + '' + str(yesterday_day))
    MapReduce.runReduce(reduce_res,
                        srcTables=[type_ext_used_search, total_ext_used_search, out_ext_used_search, ysIn_ysOut_split],
                        dstTable=result, appendMode=True, sortMode=True)
    MapReduce.runMap(data_for_stat, srcTable=result, dstTable=final_result, appendMode=True, sortMode=True)

    MapReduce.runMap(split_for_sum, srcTable=final_result, dstTable=final_result_split_sum, appendMode=True,
                     sortMode=True)
    MapReduce.runReduce(reduce_get_sum, srcTable=final_result_split_sum, dstTable=final_result,
                        appendMode=True, sortMode=True)

    data = []
    field_date = '{}-{}-{}'.format(yesterday_day[:4], yesterday_day[4:6], yesterday_day[6:])

    print('RUN_GET_STAT_RESULT' + '' + str(yesterday_day))
    for rec in MapReduce.getSample(final_result):
        data.append(field_date + '\t' + rec.key + '\t' + rec.value)

    update_all(field_date, data)

    MapReduce.dropTable(all_uid)
    MapReduce.dropTable(all_uid_uniq)
    MapReduce.dropTable(all_uid_uniq_stable)
    MapReduce.dropTable(final_result)
    MapReduce.dropTable(final_result_split_sum)
    MapReduce.dropTable(out_ext_uniq2)
    MapReduce.dropTable(out_ext_used_search)
    MapReduce.dropTable(out_ext2)
    MapReduce.dropTable(result)
    MapReduce.dropTable(split_out_ext)
    MapReduce.dropTable(split_total_ext)
    MapReduce.dropTable(split_type_ext)
    MapReduce.dropTable(total_ext_uniq1)
    MapReduce.dropTable(total_ext_used_search)
    MapReduce.dropTable(total_ext1)
    MapReduce.dropTable(type_ext_uniq0)
    MapReduce.dropTable(type_ext_used_search)
    MapReduce.dropTable(type_ext0)
    MapReduce.dropTable(ysIn_ysOut_split)

if __name__ == '__main__':
    main()
