# -*- coding: utf-8 -*-

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf
import argparse
import nile
import datetime
import uatraits, json, re
import urllib, urlparse
import math,cgi
import pandas as pd
from itertools import product
import sys
import os

cluster = clusters.yt.Hahn(pool='vika-pavlova'
    ).env(templates=dict(job_root='//home/videolog/vika-pavlova/video_fragments/assessments_count'
                        ),
          yt_spec_defaults=dict(pool_trees=["physical"],
                                tentative_pool_trees=["cloud"]),
          parallel_operations_limit=10
         )

def find_video_frag(assessment_result):
    if assessment_result.get('start'):
        return 'yes'
    elif assessment_result.get('no_answer'):
        return 'no'
    else:
        return "no_video"

def process_data_for_stat():

    job = cluster.job()

    #s2t
    s2t_raw = job.table('//home/videolog/vika-pavlova/video_fragments/assessments_count/speech2text_fragments'
                  ).project(ne.all(),
                            fielddate = ne.const(str(datetime.datetime.today()).split(' ')[0]),
                            markup_type = ne.const("speech2text_fragments"),
                            res = ne.custom(lambda x: x["verdict"], "assessment_result")
                           ).groupby('fielddate', 'markup_type',
                                    ).aggregate(assessments_count = na.count(),
                                                assessments_count_irrel = na.count_distinct('hash',
                                                                                            predicate=nf.custom(lambda x: x == "no", 'res')
                                                                                           ),
                                                assessments_count_rel = na.count_distinct('hash',
                                                                                          predicate=nf.custom(lambda x: x == "yes", 'res')
                                                                                         )
                                            )

    s2t_total = s2t_raw.project(ne.all(exclude = 'markup_type'),
                                markup_type = ne.const("_total_")
                               )
    #s2t_2
    s2t_2_raw = job.table('//home/videolog/vika-pavlova/video_fragments/assessments_count/speech2text_fragments_v2'
                  ).project(ne.all(),
                            fielddate = ne.const(str(datetime.datetime.today()).split(' ')[0]),
                            markup_type = ne.const("speech2text_fragments_v2"),
                            res = ne.custom(lambda x: x["verdict"], "assessment_result")
                           ).groupby('fielddate', 'markup_type',
                                    ).aggregate(assessments_count = na.count(),
                                                assessments_count_irrel = na.count_distinct('hash',
                                                                                            predicate=nf.custom(lambda x: x == "no", 'res')
                                                                                           ),
                                                assessments_count_rel = na.count_distinct('hash',
                                                                                          predicate=nf.custom(lambda x: x == "yes", 'res')
                                                                                         )
                                            )

    s2t_2_total = s2t_2_raw.project(ne.all(exclude = 'markup_type'),
                                markup_type = ne.const("_total_")
                               )
    #video_fragments
    video_fragments_raw = job.table('//home/videolog/vika-pavlova/video_fragments/assessments_count/video_fragments'
                                   ).project(ne.all(),
                                             fielddate = ne.const(str(datetime.datetime.today()).split(' ')[0]),
                                             markup_type = ne.const("video_fragments"),
                                             res = ne.custom(lambda x:  find_video_frag(x), "assessment_result")
                                            ).groupby('fielddate', 'markup_type',
                                                     ).aggregate(assessments_count = na.count(),
                                                                 assessments_count_irrel = na.count_distinct('hash',
                                                                                                             predicate=nf.custom(lambda x: x == "no", 'res')
                                                                                                            ),
                                                                 assessments_count_rel = na.count_distinct('hash',
                                                                                                           predicate=nf.custom(lambda x: x == "yes", 'res')
                                                                                                          ),
                                                                assessments_count_404 = na.count_distinct('hash',
                                                                                                          predicate=nf.custom(lambda x: x == "no_video", 'res')
                                                                                                         )
                                                                )

    video_fragments_total = video_fragments_raw.project(ne.all(exclude = 'markup_type'),
                                                        markup_type = ne.const("_total_")
                                                       )
    #video_fragments_relevance
    video_frag_rel_raw = job.table('//home/videolog/vika-pavlova/video_fragments/assessments_count/video_fragments_relevance'
                                  ).project(ne.all(),
                                            fielddate = ne.const(str(datetime.datetime.today()).split(' ')[0]),
                                            markup_type = ne.const("video_fragments_relevance"),
                                            res = ne.custom(lambda x: x["result"], "assessment_result")
                                           ).groupby('fielddate', 'markup_type',
                                                    ).aggregate(assessments_count = na.count(),
                                                                assessments_count_irrel = na.count_distinct('hash',
                                                                                                            predicate=nf.custom(lambda x: x == "no", 'res')
                                                                                                        ),
                                                                assessments_count_rel = na.count_distinct('hash',
                                                                                                        predicate=nf.custom(lambda x: x == "yes", 'res')
                                                                                                        ),
                                                                assessments_count_404 = na.count_distinct('hash',
                                                                                                        predicate=nf.custom(lambda x: x == "no_video", 'res')
                                                                                                        ),
                                                            )

    video_frag_rel_total = video_frag_rel_raw.project(ne.all(exclude = 'markup_type'),
                                                      markup_type = ne.const("_total_")
                                                     )

    job.concat(s2t_raw, s2t_total, s2t_2_raw, s2t_2_total, video_fragments_raw, video_fragments_total, video_frag_rel_raw, video_frag_rel_total
              ).groupby('fielddate', 'markup_type'
                       ).aggregate(assessments_count = na.sum('assessments_count'),
                                   assessments_count_irrel = na.sum('assessments_count_irrel'),
                                   assessments_count_rel = na.sum('assessments_count_rel'),
                                   assessments_count_404 = na.sum('assessments_count_404'),
                                  ).project(ne.all(),
                                            tag = ne.const("_total_")
                                           ).put('//home/videolog/vika-pavlova/video_fragments/assessments_count/stat')

    job.run()

def put_to_stat():
    client = ns.StatfaceClient(
        proxy = 'upload.stat.yandex-team.ru',
        token = os.environ['STAT_TOKEN']
    )
    ns.StatfaceReport().path('Video.All/fragments_assessments_count') \
                       .scale('daily') \
                       .client(client) \
                       .remote_publish(proxy='hahn',
                                       table_path='//home/videolog/vika-pavlova/video_fragments/assessments_count/stat',
                                       async_mode=False,
                                       upload_config=False)

def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--start_date', type=str, required=True)
    parser.add_argument('--end_date', type=str, required=True)
    args = parser.parse_args()

    process_data_for_stat()
    put_to_stat()


if __name__ == '__main__':
    main()
