import os
from datetime import date

import uatraits
from parselib import SessionRecord, DatesRange


detector = uatraits.detector('/usr/share/uatraits/browser.xml')


def get_ys(rec):
    srec = SessionRecord(rec.value)
    if not hasattr(srec, 'cookieys'):
        return
    if not hasattr(srec, 'user_agent'):
        return
    if not hasattr(srec, 'yandexuid'):
        return
    user_agent = str(srec.user_agent)
    agent = detector.detect(user_agent)
    if not 'BrowserName' in agent.keys():
        return
    if not 'Chrome' in agent.values():
        return
    ys = str(srec.cookieys)
    uid = str(srec.yandexuid)
    if len(uid) < 12:
        return
    if ('vbch.' in ys or 'homeextchrome.' in ys or 'homesearchextchrome.' in ys
        or 'searchextchrome.' in ys or 'startextchrome.' in ys or 'altsearchchrome.' in ys):

        yield Record('y' + str(uid), '', '', tableIndex=0)
    else:

        yield Record('y' + str(uid), '', '', tableIndex=1)


def get_uid(rec):
    srec = SessionRecord(rec.value)
    if not hasattr(srec, 'type'):
        return
    if rec.key[0] != 'y':
        return
    if srec.type != 'REQUEST':
        return
    uid = rec.key
    yield Record(str(uid), '', '')


def Reduce(key, records):
    res = sum(1 for rec in records)
    yield Record(key, '', str(res))


def ys_in_Reduce(key, records):
    res = sum(1 for rec in records)
    if res == 2:
        yield Record(key, '', str(res))


def ys_out_Reduce(key, records):
    res = sum(1 for rec in records)
    if res == 2:
        yield Record(key, '', str(res))


def main():
    client = MapReduceClient(server='sakura.search.yandex.net', verbose=True)
    startday = '20160301'
    endday = '20160307'

    ysInTable0 = 'kosotis/ANSEARCH_540' + endday + '/ysIn'
    ysOutTable1 = 'kosotis/ANSEARCH_540' + endday + '/ysOut'

    uniq_ysInTable0 = 'kosotis/ANSEARCH_540' + endday + '/ysIn_uniq'
    uniq_ysOutTable1 = 'kosotis/ANSEARCH_540' + endday + '/ysOut_uniq'

    allUidTable = 'kosotis/ANSEARCH_540' + endday + '/allUid'
    uniq_allUidTable = 'kosotis/ANSEARCH_540' + endday + '/allUid_uniq'

    ysInResult = 'kosotis/ANSEARCH_540' + endday + '/ysIn_result'
    ysOutResult = 'kosotis/ANSEARCH_540' + endday + '/ysOut_result'

    # MapReduce.dropTable(resultTable)

    # yesterday = date.today() - datetime.timedelta(1)
    # yesterday_day = yesterday.strftime('%Y%m%d')
    # week = yesterday - datetime.timedelta(6)
    # week_before = week.strftime('%Y%m%d')

    getYsTable = os.path.join('watch_log_tskv', endday)

    client.runMap(get_ys, srcTable=getYsTable, dstTables=[ysInTable0, ysOutTable1], appendMode=True, sortMode=True)
    client.sortTable(ysInTable0)
    client.sortTable(ysOutTable1)

    client.runReduce(Reduce, srcTable=ysInTable0, dstTable=uniq_ysInTable0, appendMode=True, sortMode=True)
    client.runReduce(Reduce, srcTable=ysOutTable1, dstTable=uniq_ysOutTable1, appendMode=True, sortMode=True)
    client.sortTable(uniq_ysInTable0)
    client.sortTable(uniq_ysOutTable1)

    for day in DatesRange(startday, date):
        getUidTable = os.path.join('user_sessions', day)
        # getUidTable = 'sample_by_yuid_1p/user_sessions/' + day
        client.runMap(get_uid, srcTable=getUidTable, dstTable=allUidTable, appendMode=True, sortMode=True)
    client.runReduce(Reduce, srcTable=allUidTable, dstTable=uniq_allUidTable, appendMode=True, sortMode=True)
    client.sortTable(allUidTable)
    client.sortTable(uniq_allUidTable)

    client.runReduce(ys_in_Reduce, srcTables=[uniq_ysInTable0, uniq_allUidTable], dstTable=ysInResult, appendMode=True,
                     sortMode=True)
    client.runReduce(ys_out_Reduce, srcTables=[uniq_ysOutTable1, uniq_allUidTable], dstTable=ysOutResult,
                     appendMode=True, sortMode=True)

    client.sortTable(ysInResult)
    client.sortTable(ysOutResult)


if __name__ == '__main__':
    main()




