#!/usr/bin/env python
#  -*- coding: utf-8 -*-
'''Process to compute showcase usage metrics'''

import json
import argparse

from qb2.api.v1 import (
    typing as qt,
    filters as qf,
    resources as qr,
    extractors as qe,
)
from nile.api.v1 import (
    Template,
    files as nile_files,
    clusters,
    statface as ns,
    extractors as ne,
    aggregators as na,
)

SOURCES = {'showcase'}


def make_job(**templates):
    '''set hahn environment and return ready to use cluster-object'''
    tmp = {
        'mobmetrika': 'logs/metrika-mobile-log/1d',
        'mobmetrika6p': 'statbox/metrika-mobile-log.6p',
        'checkpoints_root': 'home/geo-analytics/tm-42/checkpoints',
        'tm': 'home/geo-analytics/tm-42',
        'testing': '$tm/testing',
        'tmp_files': '$tm/_tmp',
    }
    tmp.update(templates)

    cluster = clusters.Hahn()

    cluster = cluster.env(
        yt_spec_defaults={
            'pool_trees': ['physical'],
            'tentative_pool_trees': ['cloud'],
        },
        parallel_operations_limit=10,
        templates=tmp
    )

    return cluster.job()


def filter_places_by_source(event_value):
    '''simple filter function to filter events by source value'''

    test = False
    try:
        event_value_parsed = json.loads(event_value)

        if event_value_parsed.get('source') in SOURCES:
            test = True
    except ValueError:
        pass

    return test


def make_report(client, scale):
    '''function that defines a statface report'''

    report = ns.StatfaceReport().scale(scale).client(client).path(
        'Maps/Adhoc/showcase_discovery_report'
    ).title('Discovery на домашке').dimensions(
        ns.Date('fielddate').title('Дата'),
        # ns.StringSelector('showcase_region').title('Регион шторы')
        # .dictionaries({
        #     '_other_': 'Без региона',
        # }).default('_total_'),
        # ns.StringSelector('op_system').title('Платформа').default('_total_'),
        ns.TreeSelector('geo_path').title('Регион пользователя').dictionaries(
            'vcfs::geobase', {
                '_other_': 'Остальные регионы'
            }
        ).default('\t10000\t'),
        ns.TreeSelector('event_category_tree').title('Тип события')
        .default('\tR\tdiscovery\t'),
    ).measures(
        ns.Number('users_count').title('Пользователи'),
        ns.Number('sessions_count').title('Сессии'),
    )

    return report


# set of region types we'd like to keep
GEO_LVLS = {
    3,  # country
    5,  # in-country region
    6,  # city
}


def map_geo(geo_id):
    '''function to get geo_id tree'''

    if not geo_id:
        return ['10000']
    geob = qr.get('Geobase')
    region = geob.region_by_id(int(geo_id))
    parents = region.path
    tree = ['10000'] + [str(reg.id) for reg in parents if reg.type in GEO_LVLS]

    if len(tree) == 1:
        tree = tree + ['_other_']

    return tree


EVENTS_RENAMED = {
    # discovery searches
    'maps_www.showcase.content.header.catalog_dialog.catalog.category.category_item':
    'search_rubric',
    'maps_www.showcase.content.search_tips.carousel.search_tip':
    'search_nearby',
    'maps_www.showcase.content.category.search_tips.search_tip':
    'search_suggest',
    # discovery collections
    'maps_www.showcase.content.collection_carousel.carousel.discovery_card':
    'collection',
    'maps_www.showcase.content.category.collection_carousel.carousel.discovery_card':
    'collection',
    # orgs
    'maps_www.showcase.content.organizations.carousel.organization_card':
    'org',
    'maps_www.showcase.content.category.organizations.carousel.organization_card':
    'org',
}

DISCOVERY_EVENTS = set(EVENTS_RENAMED.keys())


# @with_hints(
#     output_schema={
#         'path': qt.Optional[qt.String],
#         'vars': qt.Optional[qt.Yson],
#         'yandexuid': qt.Optional[qt.String],
#         'session_id': qt.Optional[qt.String],
#         'geo_path': qt.Optional[qt.String],
#         'fielddate': qt.Optional[qt.String],
#         'event_category': qt.Optional[qt.List[qt.String]],
#     }
# )
def map_event_category(path):
    '''mapping events into categories'''

    event_name = path

    if not event_name:
        return None
    event_renamed = EVENTS_RENAMED.get(event_name)

    if event_renamed is None:
        # event_renamed = '.'.join(event_name.split('.')[-3:])
        event_renamed = event_name

    event_category = ['R']

    if event_name in DISCOVERY_EVENTS:
        event_category.append('discovery')
    else:
        event_category.append('not discovery')

    if event_renamed.startswith('search'):
        search_and_type = event_renamed.split('_')
        event_category.extend(search_and_type)
    else:
        event_category.append(event_renamed)

    return event_category


def main(dates):
    '''overall logics'''

    set_of_dates = set(Template.unfold(Template(dates), 'string'))

    job = make_job(
        dates=dates,
        cooked_bebr_log='//home/maps/analytics/logs/cooked-bebr-log',
    )
    tables = job.table('$cooked_bebr_log/clean/@dates/desktop')

    stream = tables.filter(
        qf.one_of('session_date', set_of_dates),
        qf.startswith('path', 'maps_www.showcase'),
        qf.equals('event_type', 'click'),
    ).project(
        'vars',
        'path',
        'yandexuid',
        'session_id',
        qe.custom('event_category', map_event_category,
                  'path').add_hints(type=qt.List[qt.String]),
        qe.unfold_prefixes(
            'event_category_unfold',
            'event_category',
        ).add_hints(type=qt.List[qt.String]),
        qe.unfold_prefixes('geo_path_unfold',
                           'geo_tree').add_hints(type=qt.List[qt.String]),
        geo_tree=ne.custom(map_geo,
                           'geo_id').add_hints(type=qt.List[qt.String]),
        geo_path=ne
        .custom(lambda x: '\t%s\t' % '\t'.join(x),
                'geo_path_unfold').add_hints(type=qt.String),
        event_category_tree=ne.custom(
            lambda x: '\t%s\t' % '\t'.join(x),
            'event_category_unfold',
        ).add_hints(type=qt.String),
        fielddate='session_date',
        files=[nile_files.StatboxDict('Geobasev6.bin', use_latest=True)],
    )

    statface_client = ns.StatfaceProductionClient()
    report = make_report(statface_client, 'daily')

    report_data = stream.groupby(
        'fielddate',
        'geo_path',
        'event_category_tree',
    ).aggregate(
        users_count=na.count_distinct('yandexuid'),
        sessions_count=na.count_distinct('session_id'),
    )

    report_data.publish(report)

    return job


if __name__ == '__main__':
    COMMAND_ARGUMENTS = argparse.ArgumentParser(
        description='''
        prepare and run a nile job on squized bebr to get a cooked log'''
    )
    COMMAND_ARGUMENTS.add_argument(
        'dates',
        help='dates to process in the format "YYYY-MM-DD"',
        type=str,
    )

    ARGS = vars(COMMAND_ARGUMENTS.parse_args())
    print ARGS

    JOB = main(**ARGS)

    JOB.run()

    exit()
