# -*- coding: utf-8 -*-

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf
import argparse
import nile
import datetime
import uatraits, json, re
import urllib, urlparse
import math,cgi
import pandas as pd
from itertools import product
import sys
import os

cluster = clusters.yt.Hahn(pool='vika-pavlova'
    ).env(templates=dict(job_root='home/videolog/vika-pavlova/2406-learn_vs_pool'
                        ),
          yt_spec_defaults=dict(pool_trees=["physical"],
                                tentative_pool_trees=["cloud"]),
          parallel_operations_limit=10
         )

def process_data_for_stat():

    job = cluster.job()

    video = job.table('//home/videolog/vika-pavlova/brand_safety/assessments/video'
                  ).project(ne.all(),
                            fielddate = ne.const(str(datetime.datetime.today()).split(' ')[0]),
                            markup_type = ne.const("bs_video"),
                            res = ne.custom(lambda x: x['result'], 'assessment_result')
                           )
    video_total = video.groupby('fielddate', 'markup_type',
                            ).aggregate(assessments_count = na.count(),
                                        assessments_count_irrel = na.count_distinct('hash',
                                                                                 predicate=nf.custom(lambda x: x == "not_rel", 'res')
                                                                                ),
                                        assessments_count_rel = na.count_distinct('hash',
                                                                                 predicate=nf.custom(lambda x: x == "rel", 'res')
                                                                                ),
                                        assessments_count_error = na.count_distinct('hash',
                                                                                 predicate=nf.custom(lambda x: x == "404", 'res')
                                                                                )
                                    )

    title = job.table('//home/videolog/vika-pavlova/brand_safety/assessments/title'
                  ).project(ne.all(),
                            fielddate = ne.const(str(datetime.datetime.today()).split(' ')[0]),
                            markup_type = ne.const("bs_title"),
                            res = ne.custom(lambda x: x['result'], 'assessment_result')
                           )

    title_total = title.groupby('fielddate', 'markup_type',
                               ).aggregate(assessments_count = na.count(),
                                           assessments_count_irrel = na.count_distinct('hash',
                                                                                       predicate=nf.custom(lambda x: x == "not_rel", 'res')
                                                                                      ),
                                           assessments_count_rel = na.count_distinct('hash',
                                                                                     predicate=nf.custom(lambda x: x == "rel", 'res')
                                                                                    ),
                                           assessments_count_error = na.count_distinct('hash',
                                                                                       predicate=nf.custom(lambda x: x == "junk", 'res')
                                                                                      )
                                         )

    job.concat(video_total, title_total
              ).put('//home/videolog/vika-pavlova/brand_safety/assessments/stat')

    job.run()


def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--start_date', type=str, required=True)
    parser.add_argument('--end_date', type=str, required=True)
    args = parser.parse_args()

    process_data_for_stat()


if __name__ == '__main__':
    main()
