# -*- coding: utf-8 -*-
"""The script aggregates redirect-logs of the travel service
by a number of parameters: hotel ("name"), its country, provider/operator
and where the user comes from ("source", "medium")
See HOTELS-2748, HOTELS-2958"""

from itertools import combinations
from functools import partial
from copy import copy
import datetime as dt

from nile.api.v1 import (
    extractors as ne,
    aggregators as na,
    filters as nf,
    statface as ns,
    Record,
    cli
)

from qb2.api.v1 import (
    extractors as se,
    filters as sf,
    resources as sr
)

REPORT_TITLE = u'Метрики базы: рубрикация'
REPORT_PATH = "Adhoc/Hotels/Hotels_rubrics"
REPORT_YAML_CONFIG = "rubrics.yaml"
POP_REPORT_TITLE = u'Метрики базы: рубрикация (popularity)'
POP_REPORT_PATH = "Adhoc/Hotels/Hotels_rubrics_popularity"
POP_REPORT_YAML_CONFIG = "pop_rubrics.yaml"

DEFAULT_DIR = "home/travel/analytics/hotels_rubrics"

COMPANY = '//home/altay/db/export/current-state/snapshot/company'
COMPANY_TO_RUBRIC = '//home/altay/db/export/current-state/snapshot/company_to_rubric'
PROVIDERS = '//home/altay/db/export/current-state/snapshot/provider'
POPULARITY = '//home/sprav/assay/common/Popularity'

TRAVEL_RUBRICS = {"184106414": u'Гостиница',
                  "184106404": u'Санаторий',
                  "184106400": u'Дом отдыха',
                  "20699506347": u'Хостел',
                  "184106426": u'Турбаза',
                  "197061821387": u'Жилье посуточно',
                  "184106420": u'Кемпинг',
		  "255921949": "Отдых на ферме"}


FIELDS = ("ru", "provider", "rubric", "status")

def norm_rubric(rubric_id):
    return int(rubric_id) if rubric_id in TRAVEL_RUBRICS else 1

def add_totals(records, fields, special_cases):
    """The mapper should me curried before use"""
    for rec in records:
        proto_result = rec.to_dict()
        for n in range(len(fields) + 1):
            for combination in combinations(fields, n):
                result = copy(proto_result)
                for key in result.keys():
                    if key in combination:
                        if key in special_cases:
                            result[key] = special_cases[key]
                        else:
                            result[key] = "total"
                yield Record(**result)


add_totals_curried = partial(
    add_totals, fields=FIELDS, special_cases={"rubric": 0})

def mapper(records):
    for rec in records:
        countries = []
        address = rec.address or {}
        for elem in address.get("components", []):
                if elem and elem.get("kind") == "country":
                    countries.append(elem["name"])
        ru_country = [elem["value"] for elem in countries if elem["locale"] == "ru"]
        if ru_country:
            country=ru_country[0]
        elif countries:
            country=countries[0]["value"]
        else:
            country=None
        region_code = address.get("region_code")
        for p in (prov["provider_id"] for prov in rec.providers):
            yield Record(provider_id=p,
                        permalink=rec.permalink,
                        # countries=countries,
                        # country=country,
                        # region_code=region_code,
			status=rec.publishing_status,
                        ru="ru" if region_code=="RU" else "world"
                        )

@cli.statinfra_job
def make_job(job, options, nirvana, statface_client):
    """Standart function according to Statistics conventions,
    see https://clubs.at.yandex-team.ru/statistics/1143"""

    count_report = ns.StatfaceReport() \
        .from_yaml_config(REPORT_YAML_CONFIG)\
        .path(REPORT_PATH)\
        .title(REPORT_TITLE.encode('utf8'))\
        .scale("daily")\
        .client(statface_client)

    popularity_report = ns.StatfaceReport() \
        .from_yaml_config(POP_REPORT_YAML_CONFIG)\
        .path(POP_REPORT_PATH)\
        .title(POP_REPORT_TITLE.encode('utf8'))\
        .scale("daily")\
        .client(statface_client)


    dates = options.dates
    if len(dates) > 1:
        suffix = "{first}_{last}".format(first=dates[0], last=dates[-1])
    else:
        suffix = dates[0]

    job_root = nirvana.directories[0] if nirvana.directories else DEFAULT_DIR

    job = job.env(
        templates=dict(job_root=job_root,
	               suffix=suffix
                      )
    )


    today = dt.datetime.now().date().isoformat()
    count_output_table = nirvana.output_tables[0] if nirvana.output_tables else '$job_root/count/$suffix'
    count_intermediate_table = '$job_root/intermediate/$suffix'
    popularity_output_table = nirvana.output_tables[0] if nirvana.output_tables else '$job_root/popularity/$suffix'

    rubrics = job.table(COMPANY_TO_RUBRIC)\
		   .project("is_main",
			    "rubric_permalink",
			    permalink="company_permalink")\
		  .groupby("permalink")\
		  .aggregate(main_rubric=na.last("rubric_permalink", by="is_main"),
			      rubrics=na.distinct("rubric_permalink"))\
		   .project(ne.all(),
			   other_rubrics=ne.custom(lambda main, other: [elem for elem in other if elem != main], "main_rubric", "rubrics"))

    travel_providers = job.table(PROVIDERS)\
              .filter(nf.custom(lambda x: (x == "yandex_travel") or x.startswith("ytravel"), "permalink"))\
              .project(provider_id="id",
                     provider_internal_name="permalink")

    popularity = job.table(POPULARITY)

    mapped = job.table(COMPANY)\
	  .filter(nf.custom(lambda status: status in ('publish', 'closed'), "publishing_status"))\
          .map(mapper)\
          .join(travel_providers, by="provider_id")\
          .join(rubrics, by="permalink")\
          .join(popularity, by="permalink")\
          .project("permalink",
		   "ru",
                   "main_rubric",
                   "popularity",
		   "status",
		   "main_rubric",
		   "other_rubrics",
                   provider="provider_internal_name",
		   fielddate=ne.const(today),
		   rubric=ne.custom(norm_rubric, "main_rubric"),
                   has_hotel_rubrics=ne.custom(lambda rubrics: bool(set(rubrics).intersection(TRAVEL_RUBRICS)), "rubrics"),
                   main_is_hotel=ne.custom(lambda main: main in TRAVEL_RUBRICS, "main_rubric"),
                   the_sole_is_hotel=ne.custom(lambda rubrics: len(rubrics) == 1 and rubrics[0] in TRAVEL_RUBRICS, "rubrics"),
		   other_hotel = ne.custom(lambda other: bool(set(other).intersection(TRAVEL_RUBRICS)),  "other_rubrics"),
                   other_non_hotel = ne.custom(lambda other: bool(set(other).difference(TRAVEL_RUBRICS)),  "other_rubrics"))\
	    .put(count_intermediate_table)\
            .map(add_totals_curried)

    mapped\
            .groupby("fielddate", *FIELDS)\
            .aggregate(
                cnt=na.count_distinct('permalink'),
                has_hotel_rubrics=na.count_distinct('permalink', predicate=nf.equals("has_hotel_rubrics", True)),
		the_sole_is_hotel=na.count_distinct('permalink', predicate=nf.equals("the_sole_is_hotel", True)),
                main_is_hotel=na.count_distinct('permalink', predicate=nf.equals("main_is_hotel", True)),
                other_hotel=na.count_distinct('permalink', predicate=nf.equals("other_hotel", True)),
		other_non_hotel=na.count_distinct('permalink', predicate=nf.equals("other_non_hotel", True)))\
        .put(count_output_table)\
        .publish(count_report, remote_mode=True)

    mapped\
            .groupby("permalink", "fielddate", *FIELDS)\
            .aggregate(
		popularity=na.any("popularity"),
		has_hotel_rubrics=na.max("has_hotel_rubrics"),
		the_sole_is_hotel=na.max("the_sole_is_hotel"),
                main_is_hotel=na.max("main_is_hotel"),
                other_hotel=na.max("other_hotel"),
		other_non_hotel=na.max("other_non_hotel"))\
            .groupby("fielddate", *FIELDS)\
	    .aggregate(
		the_sum=na.sum("popularity"),
                has_hotel_rubrics=na.sum('popularity', predicate=nf.custom(lambda x: bool(x), "has_hotel_rubrics")),
		the_sole_is_hotel=na.sum('popularity', predicate=nf.custom(lambda x: bool(x), "the_sole_is_hotel")),
                main_is_hotel=na.sum('popularity', predicate=nf.custom(lambda x: bool(x), "main_is_hotel")),
                other_hotel=na.sum('popularity', predicate=nf.custom(lambda x: bool(x), "other_hotel")),
		other_non_hotel=na.sum('popularity', predicate=nf.custom(lambda x: bool(x), "other_non_hotel")))\
           .put(popularity_output_table)\
           .publish(popularity_report, remote_mode=True)


    return job


if __name__ == "__main__":
    cli.run()
