#!/usr/bin/env python
#  -*- coding: utf-8 -*-
'''Process to compute showcase usage metrics'''

import json
import argparse

from qb2.api.v1 import (
    typing as qt,
    filters as qf,
    resources as qr,
    extractors as qe,
)
from nile.api.v1 import (
    Record,
    Template,
    files as nile_files,
    clusters,
    statface as ns,
    extractors as ne,
    with_hints,
    aggregators as na,
)


def make_job(yql=False, **templates):
    '''set hahn environment and return ready to use cluster-object'''
    tmp = {
        'mobmetrika': 'logs/metrika-mobile-log/1d',
        'mobmetrika6p': 'statbox/metrika-mobile-log.6p',
        'checkpoints_root': 'home/geo-analytics/tm-42/checkpoints',
        'tm': 'home/geo-analytics/tm-42',
        'testing': '$tm/testing',
        'tmp_files': '$tm/_tmp',
    }
    tmp.update(templates)

    if yql:
        cluster = clusters.Hahn()
    else:
        cluster = clusters.yt.Hahn()

    cluster = cluster.env(
        yt_spec_defaults={
            'pool_trees': ['physical'],
            'tentative_pool_trees': ['cloud'],
        },
        parallel_operations_limit=10,
        templates=tmp
    )

    return cluster.job()


SOURCES = {'showcase'}


def filter_places_by_source(event_value):
    '''simple filter function to filter events by source value'''

    test = False
    try:
        event_value_parsed = json.loads(event_value)

        if event_value_parsed.get('source') in SOURCES:
            test = True
    except ValueError:
        pass

    return test


SCHEME = {
    'fielddate': qt.String,
    'geo_path': qt.String,
    'DeviceIDHash': qt.String,
    'DeviceIDSessionIDHash': qt.String,
    'events_by_type': qt.Json,
    'searches_by_type': qt.Json,
    'discovery_events_count': qt.Integer,
}


def make_report():
    '''function that defines a statface report'''
    client = ns.StatfaceBetaClient()

    report = ns.StatfaceReport().path(
        'Maps.Normal/Others/showcase_report_diff_reg'
    ).scale('daily').client(client).title('Showcase report dr').dimensions(
        ns.Date('fielddate').title('Дата'),
        ns.StringSelector('showcase_region').title('Регион шторы')
        .dictionaries({
            '_other_': 'Без региона',
        }).default('_total_'),
        ns.StringSelector('op_system').title('Платформа').default('_total_'),
        ns.TreeSelector('geo_path').title('Регион пользователя').dictionaries(
            'vcfs::geobase', {
                '_other_': 'Остальные регионы'
            }
        ).default('\t10000\t'),
        ns.TreeSelector(
            'event_category_tree'
        ).title('Тип события').default('_total_'),
    ).measures(
        ns.Number('users_count').title('Пользователи'),
        ns.Number('sessions_count').title('Сессии'),
    )

    return report


# set of region types we'd like to keep
GEO_LVLS = {
    3,  # country
    5,  # in-country region
    6,  # city
}


def map_geo(geo_id):
    '''function to get geo_id tree'''

    if not geo_id:
        return ['10000']
    geob = qr.get('Geobase')
    region = geob.region_by_id(int(geo_id))
    parents = region.path
    tree = ['10000'] + [str(reg.id) for reg in parents if reg.type in GEO_LVLS]

    if len(tree) == 1:
        tree = tree + ['_other_']

    return tree


EVENTS_SPLIT = {
    # places cards need to be specifically processed
    'search.open-place-view': 'place',
    # discovery searches
    'showcase.search': 'search',
    # discovery collections
    'showcase.open-discovery': 'collection',
    # route building
    'showcase.make-route': 4,
    'showcase.open-routes': 4,
    # shows / hides
    'showcase.hide': -1,
    'showcase.appear': 1,
    # actual expand
    'showcase.expand': 2,
    # other
    'showcase.change-tag': 5,
    'showcase.scroll-pager': 5,
    # technical
    'showcase.block-appear': 9,
    'showcase.select-category': 9,
    'showcase.show-pager-items': 9,
    # showcase info
    'showcase.get-showcase-info': 'info',
}


@with_hints(
    output_schema={
        'EventName': qt.Optional[qt.String],
        'EventValue': qt.Optional[qt.Yson],
        'EventNumber': qt.Optional[qt.String],
        'DeviceIDHash': qt.Optional[qt.String],
        'AppPlatform': qt.Optional[qt.String],
        'DeviceIDSessionIDHash': qt.Optional[qt.String],
        'geo_path': qt.Optional[qt.String],
        'fielddate': qt.Optional[qt.String],
        'event_category': qt.Optional[qt.List[qt.String]],
    }
)
def map_events(records):
    '''mapping events into categories'''

    for record in records:
        event_type = EVENTS_SPLIT.get(record.EventName)

        if isinstance(event_type, int):
            continue
        elif event_type == 'search':
            try:
                search_type = (
                    json.loads(record.EventValue).get('type', 'no_type')
                )
            except ValueError:
                search_type = 'Unknown'
            event_type = [event_type, search_type]
        else:
            event_type = [event_type]

        event_type = ['R'] + event_type

        yield Record(record, event_category=event_type)


@with_hints(
    output_schema={
        'fielddate': qt.Optional[qt.String],
        'geo_path': qt.Optional[qt.String],
        'AppPlatform': qt.Optional[qt.String],
        'DeviceIDHash': qt.Optional[qt.String],
        'DeviceIDSessionIDHash': qt.Optional[qt.String],
        'event_category': qt.Optional[qt.List[qt.String]],
        # 'showcase_regions': qt.Optional[qt.List[qt.String]],
        'showcase_regions': qt.Optional[qt.String],
    }
)
def reduce_events_into_sessions(groups):
    '''
    reducer that collects session info
    and makes an overall-session info-object'''

    for group, records in groups:
        showcase_region = '_other_'

        for record in records:
            event_category = record.event_category

            if 'info' in event_category:
                try:
                    event_value = json.loads(record.EventValue)
                    showcase_region = event_value.get('region', '_other_')
                except ValueError:
                    pass
            else:
                if showcase_region:
                    pass
                else:
                    showcase_region = '_other_'
                yield Record(
                    group,
                    showcase_regions=showcase_region,
                    event_category=event_category,
                )


# def reduce_events_into_sessions(groups):
#     '''
#     reducer that collects session info
#     and makes an overall-session info-object'''
#
#     for group, records in groups:
#         events = []
#         regions = {'_total_'}
#         showcase_region = None
#
#         for record in records:
#             event_category = record.event_category
#
#             if 'info' in event_category:
#                 try:
#                     event_value = json.loads(record.EventValue)
#                     showcase_region = event_value.get('region')
#                 except ValueError:
#                     pass
#
#                 if showcase_region:
#                     regions.add(showcase_region)
#             else:
#                 if event_category not in events:
#                     events.append(event_category)
#
#         if not events:
#             continue
#
#         if len(regions) == 1:
#             regions.add('_other_')
#
#         for event in events:
#             yield Record(
#                 group,
#                 showcase_regions=list(regions),
#                 event_category=event,
#             )
#
#
def main(dates, yql, full_log, path):
    '''overall logics'''

    job = make_job(yql=yql, dates=dates, path=path)
    set_of_dates = set(Template.unfold(Template(dates), 'string'))

    if full_log:
        tables = job.table('$mobmetrika/@dates')
    else:
        tables = job.table('$mobmetrika6p/@dates')

    stream = (
        tables.filter(
            qf.equals('APIKey', '4'),
            qf.equals('SessionType', 'SESSION_FOREGROUND'),
            qf.one_of('StartDate', set_of_dates),
            qf.or_(
                qf.and_(
                    qf.equals('EventName', 'search.open-place-view'),
                    qf.custom(filter_places_by_source, 'EventValue'),
                ),
                qf.startswith('EventName', 'showcase'),
            ),
        ).project(
            'EventValue',
            'EventName',
            'EventNumber',
            'DeviceIDHash',
            'AppPlatform',
            'DeviceIDSessionIDHash',
            qe.unfold_prefixes('geo_path_unfold',
                               'geo_tree').add_hints(type=qt.List[qt.String]),
            geo_tree=ne.custom(map_geo,
                               'RegionID').add_hints(type=qt.List[qt.String]),
            geo_path=ne
            .custom(lambda x: '\t%s\t' % '\t'.join(x),
                    'geo_path_unfold').add_hints(type=qt.String),
            fielddate='StartDate',
            files=[nile_files.StatboxDict('Geobasev6.bin', use_latest=True)],
        ).map(map_events,
              memory_limit=128).checkpoint('mapped_events').groupby(
                  'geo_path',
                  'fielddate',
                  'DeviceIDHash',
                  'AppPlatform',
                  'DeviceIDSessionIDHash',
              ).sort('EventNumber')
        .reduce(reduce_events_into_sessions, memory_limit=1024).filter(
            qf.not_(qf.contains('event_category', 'info'))
        ).project(
            ne.all(),
            qe.unfold_with_total('showcase_region',
                                 'showcase_regions').add_hints(type=qt.String),
            # qe.unfold('showcase_region', 'showcase_regions').add_hints(
            #     type=qt.String
            # ),
            qe.unfold_with_total('op_system',
                                 'AppPlatform').add_hints(type=qt.String),
            qe.unfold_prefixes('event_category_unfold',
                               'event_category').add_hints(
                                   type=qt.List[qt.String]
                               ),
            event_category_tree=ne.
            custom(lambda x: '\t%s\t' % '\t'.join(x),
                   'event_category_unfold').add_hints(type=qt.String),
        )
    ).checkpoint('reduced_events')

    report = make_report()

    report_data = stream.groupby(
        'fielddate',
        'geo_path',
        'op_system',
        'event_category_tree',
        'showcase_region',
    ).aggregate(
        users_count=na.count_distinct('DeviceIDHash'),
        sessions_count=na.count_distinct('DeviceIDSessionIDHash'),
    )

    report_data.publish(report)

    stream.put('$testing/$path')

    return job


if __name__ == '__main__':
    COMMAND_ARGUMENTS = argparse.ArgumentParser(
        description='''
        prepare and run a nile job on squized bebr to get a cooked log'''
    )
    COMMAND_ARGUMENTS.add_argument(
        '--path',
        '-p',
        help='prefix to the path to put data to',
        type=str,
        default='248-showcase/session-wise'
    )
    COMMAND_ARGUMENTS.add_argument(
        '--dates',
        '-d',
        help='dates to process in the format "YYYY-MM-DD"',
        type=str,
        default='{2019-02-11..2019-02-17}',
    )
    COMMAND_ARGUMENTS.add_argument(
        '-yql',
        help='yql or yt',
        dest='yql',
        action='store_true',
        default=False,
    )
    COMMAND_ARGUMENTS.add_argument(
        '-6p',
        help='6p log or full log',
        dest='full_log',
        action='store_false',
        default=True,
    )
    COMMAND_ARGUMENTS.add_argument(
        '--checkpoints',
        '-ch',
        help='checkpoints to use. can be "mapped_events,reduced_events"',
        type=str,
    )

    ARGS = vars(COMMAND_ARGUMENTS.parse_args())
    print ARGS
    CHECKPOINTS = ARGS['checkpoints']
    del ARGS['checkpoints']

    JOB = main(**ARGS)

    if CHECKPOINTS:
        JOB.run(checkpoints=CHECKPOINTS.split(','))
    else:
        JOB.run()

    exit()
