from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf
import argparse
import nile
import datetime
import uatraits, json, re
import urllib, urlparse
import math,cgi
import pandas as pd
from itertools import product
import sys
import os


def parse_us(groups):
    import libra

    for key, recs in groups:
        uid = key.key

        try:
            s = libra.ParseSession(recs, './blockstat.dict')
        except Exception as e:
            continue

        for r in s:
            if r.IsA('TYandexWebRequest'):
                ui = 'desktop web'
            elif r.IsA('TTouchYandexWebRequest'):
                ui = 'touch web'
            elif r.IsA('TPadYandexWebRequest'):
                ui = 'pad web'
            else:
                continue

            if r.ServiceDomRegion != 'ru':
                continue

            date = str(datetime.datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]

            for bl in r.GetBSBlocks():
                p = bl.Path
                vv = bl.GetVars()
                if 'snippet/video/showcase/item/thumb' in p:
                    filmid = '-'
                    index = -1
                    for v in vv:
                        if '-item' in v:
                            index = int(v[1])
                        if '-filmId' in v:
                            filmid = v[1]

                    if filmid != '-':
                        total_ui = '_total_'
                        yield Record(uid=uid,ui=ui,filmid=filmid,thumb=index,reqid=r.ReqID,date=date)
                        yield Record(uid=uid,ui=total_ui,filmid=filmid,thumb=index,reqid=r.ReqID,date=date)


def process_data_for_stat(date):

    cluster = clusters.yt.Hahn(
                            ).env(templates=dict(job_root='home/videolog/vika-pavlova/2205-wizard_doubles/' + date),
                                  yt_spec_defaults=dict(pool_trees=["physical"],
                                                        tentative_pool_trees=["cloud"]
                                                       ),
                                  parallel_operations_limit=10
                                  )

    job = cluster.job()

    us = job.table('user_sessions/pub/search/daily/' + date + '/clean')

    reqs = us.groupby('key').sort('subkey'
                              ).reduce(parse_us,
                                       files=[nile.files.RemoteFile('statbox/statbox-dict-last/blockstat.dict'),
                                              nile.files.RemoteFile('statbox/resources/libra.so') ],
                                       memory_limit=4000
                                      ).put('$job_root/reqs')

    reqs.groupby('date', 'ui', 'reqid', 'filmid'
                ).aggregate(filmids_count=na.count()
                           ).put('$job_root/aggr_by_reqid')

    job.run()


    job = cluster.job()

    t = job.table('$job_root/aggr_by_reqid')

    temp = t.groupby('date', 'reqid', 'ui'
                    ).aggregate(doubles_count=na.count(predicate=nf.custom(lambda x: x > 1, 'filmids_count')),
                                unique_count=na.count(predicate=nf.custom(lambda x: x == 1, 'filmids_count'))
                               )

    temp.groupby('date', 'ui'
                ).aggregate(bad_reqids=na.count(predicate=nf.custom(lambda x: x > 0, 'doubles_count')),
                            all_reqids=na.count()
                           ).project('ui', 'bad_reqids', 'all_reqids',
                                     fielddate=ne.custom(lambda a: a, 'date')
                                    ).put('$job_root/final_for_stat')

    job.run()


def put_data_to_stat(date):

    client = ns.StatfaceClient(
        proxy = 'upload.stat.yandex-team.ru',
        token = os.environ['STAT_TOKEN']
    )
    ns.StatfaceReport().path('Video.All/wizard_doubles_reqids_share') \
                       .scale('daily') \
                       .client(client) \
                       .remote_publish(proxy='hahn',
                                       table_path='//home/videolog/vika-pavlova/2205-wizard_doubles/' + date + '/final_for_stat',
                                       async_mode=False,
                                       upload_config=False)


def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--start_date', type=str, required=True)
    parser.add_argument('--end_date', type=str, required=True)
    args = parser.parse_args()

    for date in pd.date_range(start=args.start_date, end=args.end_date):
        date_str = str(date)[:10]
        process_data_for_stat(date_str)
        put_data_to_stat(date_str)


if __name__ == '__main__':
    main()
