# -*- coding: utf-8 -*-
import os
from itertools import combinations
from copy import copy
from functools import partial

from nile.api.v1 import (
    extractors as ne,
    aggregators as na,
    filters as nf,
    statface as ns,
    Record,
    cli
)

from nile.files import LocalFile

from qb2.api.v1 import (
    extractors as se,
    filters as sf,
    resources as sr
)

directory = os.environ['JWD']
os.sys.path.append(directory)
from common import DEFAULT_DIR, add_totals, norm_rubric, get_hotel_wizards, safe_div, is_one_org


REPORT_TITLE = u'Кэш-хит и кэш-мисс'
REPORT_PATH = "Adhoc/Hotels/Cache-hit-miss"
REPORT_YAML_CONFIG = "cache_hit/Cache_hit_miss.yaml"

RESULT = "result"
SUBMIT = "/form/submit"
FIELDS = ("device", "region", "request_after_carousel_click", "rubric")

add_totals_curried = partial(add_totals, fields=FIELDS, special_cases={"rubric":0})

@cli.statinfra_job
def make_job(job, options, nirvana, statface_client):
    """Standart function according to Statistics conventions,
    see https://clubs.at.yandex-team.ru/statistics/1143"""

    dates = options.dates
    if len(dates) > 1:
        suffix = "{first}_{last}".format(first=dates[0], last=dates[-1])
    else:
        suffix = dates[0]

    job_root = nirvana.directories[0] if nirvana.directories else DEFAULT_DIR

    job = job.env(
        templates=dict(job_root=job_root,
	               suffix=suffix
                      )
    )

    input_table = nirvana.input_tables[0] if nirvana.input_tables else '$job_root/user_sessions/@dates'
    output_table = nirvana.output_tables[0] if nirvana.output_tables else '$job_root/cache_hit_miss/$suffix'

    report = ns.StatfaceReport() \
        .from_yaml_config(REPORT_YAML_CONFIG)\
        .path(REPORT_PATH)\
        .title(REPORT_TITLE.encode('utf8'))\
        .scale("daily")\
        .client(statface_client)

    job = job.env(
	files=[LocalFile(os.path.join(directory, 'common.py'))],
        templates=dict(job_root=job_root),
    )

    job.table(input_table)\
	  .filter(nf.custom(is_one_org, "blocks"))\
	  .project("region",
                   "device",
                   "permalink",
                   request_after_carousel_click=ne.const("0"),
                   fielddate=ne.custom(lambda x: x.split("T")[0], "time_isoformatted"),
	           num_of_results=ne.custom(lambda blocks: sum(elem["path"]==RESULT for elem in get_hotel_wizards(blocks) if elem.get("subtype") in ("hotel", "1org")), "blocks"),
	           num_of_results_after_search=ne.custom(lambda blocks: sum(elem["path"]==RESULT for elem in get_hotel_wizards(blocks) if elem.get("subtype") is None), "blocks"),
		   was_search=ne.custom(lambda x: sum(elem["path"].endswith(SUBMIT) for elem in x)>0 , "clicks"),
 		   rubric=ne.custom(lambda x: norm_rubric(x.get("FirstRubricId")), "search_props"))\
          .map(add_totals_curried)\
	  .groupby("fielddate", "permalink", *FIELDS)\
          .aggregate(cnt=na.count(),
		     num_of_searches=na.sum("was_search"),
		     cache_more_than_3_hits=na.count(predicate=nf.custom(lambda x: x>3, "num_of_results")),
		     cache_more_than_2_hits=na.count(predicate=nf.custom(lambda x: x>2, "num_of_results")),
		     cache_more_than_1_hit=na.count(predicate=nf.custom(lambda x: x>1, "num_of_results")),
		     cache_hit=na.count(predicate=nf.custom(lambda x: x>0, "num_of_results")),
		     results_after_search=na.count(predicate=nf.custom(lambda x:x>0, "num_of_results_after_search")),
		     )\
	   .project(ne.all(),
                    cache_hit_ratio=ne.custom(safe_div, "cache_hit", "cnt"))\
	    .groupby("fielddate", *FIELDS)\
            .aggregate(cnt=na.sum("cnt"),
		     cache_more_than_3_hits=na.sum("cache_more_than_3_hits"),
		     cache_more_than_2_hits=na.sum("cache_more_than_2_hits"),
		     cache_more_than_1_hit=na.sum("cache_more_than_1_hit"),
		     cache_hit=na.sum("cache_hit"),
		     num_of_searches=na.sum("num_of_searches"),
		     results_after_search = na.sum("results_after_search"),
		     permalink_cnt=na.count(),
                     sometimes_cache_hit_permalinks_count=na.count(predicate=nf.custom(lambda x: x> 0.1 and x<=0.9, "cache_hit_ratio")),
                     sometimes_cache_hit_permalinks_num=na.mean("cnt", predicate=nf.custom(lambda x: x> 0.1 and x<=0.9, "cache_hit_ratio")),
                     always_cache_hit_permalinks_count=na.count(predicate=nf.custom(lambda x: x>0.9, "cache_hit_ratio")),
                     always_cache_hit_permalinks_num=na.mean("cnt", predicate=nf.custom(lambda x: x>0.9, "cache_hit_ratio")),
                     always_cache_miss_permalinks_count=na.count(predicate=nf.custom(lambda x: x<=0.1, "cache_hit_ratio")),
                     always_cache_miss_permalinks_num=na.mean("cnt", predicate=nf.custom(lambda x: x<=0.1, "cache_hit_ratio")),
		     )\
           .project(ne.all(),
                     cache_more_than_3_hits=ne.custom(lambda x: x or 0, "cache_more_than_3_hits"),
		     cache_more_than_2_hits=ne.custom(lambda x: x or 0, "cache_more_than_2_hits"),
		     cache_more_than_1_hit=ne.custom(lambda x: x or 0, "cache_more_than_1_hit"),
		     cache_hit=ne.custom(lambda x: x or 0, "cache_hit"),
		     results_after_search_ratio=ne.custom(safe_div, "results_after_search", "num_of_searches"),
                     cache_hit_ratio=ne.custom(safe_div, "cache_hit", "cnt"))\
	   .put(output_table)\
           .publish(report)

    return job


if __name__ == "__main__":
    cli.run()
