#!/usr/bin/env python
# -*- coding: utf-8 -*-

from nile.api.v1 import (
    clusters,
    filters as nf,
    aggregators as na,
    extractors as ne,
    files as nile_files,
    with_hints,
    Record
)

from qb2.api.v1 import (
    extractors as qe,
    resources as qr,
    typing as qt
)

import json
import argparse

# start_date = '2019-04-01'
# end_date = '2019-04-24'
# job_root = '//home/geo-analytics/pruzhinkina/GEOHELL-124'


def make_job(start_date, end_date, job_root):
    cluster = clusters.yt.Hahn()
    cluster = cluster.env(
        templates={
            'dates': '{%s..%s}' % (start_date, end_date),
            'job_root': job_root,
            'checkpoints_root': '$job_root/checkpoints'
        },
        yt_spec_defaults={
            'pool_trees': ['physical'],
            'tentative_pool_trees': ['cloud']
        },
        parallel_operations_limit=10
    )
    
    return cluster.job()


def get_country_name(geo_id):
    """Takes geo id and returns country name based on geobase"""
    geob = qr.get('Geobase')
    try:
        region = geob.region_by_id(int(geo_id))
        parents = region.path
        country = [reg.name for reg in parents if reg.type == 3]
        if country:
            country_name = country[0]
        else:
            country_name = None
        return country_name
    except:
        return None


def get_permalink(event_value):
    """Returns permalink for the events having 'permalink' among parameters"""
    if 'permalink' in event_value.keys():
        try:
            return int(event_value['permalink'])
        except:
            return None
    else:
        return None


def reducer(groups):
    """
    Associates each destination with modes 
    of transport opened by user
    """
    for key, records in groups:
        group_result = []
        curr_permalink = None
        for record in records:
            if 'build_route' in record.path:
                curr_permalink = record.permalink
            else:
                if curr_permalink is not None:
                    mode = record.mode
                    group_result.append([curr_permalink, mode])
        
        if len(group_result) > 0:
            yield Record(yandexuid=key.yandexuid,
                         routes=group_result)


def get_build_route_paths():
    """
    extracting route building events
    from https://github.yandex-team.ru/geo-analytics/
    maps/blob/master/common/clicks_description.json
    """

    with open('/ephemeral/home/pruzhinkina/maps/common/clicks_description.json'
              ) as f:
        events = json.loads(f.read())
    build_route_paths = [
        event['name'].encode('utf-8')
        for event in events[0]['clicks'][0]['events']
    ]
    return build_route_paths


def main(start_date, end_date, job_root):
    job = make_job(start_date, end_date, job_root)

    desktop_log = job.table(
        '//home/maps/analytics/logs/cooked-bebr-log/clean/@dates/desktop')

    altay_pretty = job.table('//home/sprav/assay/common/company_pretty_format')
    rubrics = altay_pretty.project(
        'permalink',
        rubric_names=ne.custom(lambda x: x.split(';'), 'rubric_names_ru')
    )

    build_route_paths = get_build_route_paths()

    routes_to_supermarket = desktop_log.filter(
        nf.or_(
            nf.and_(
                nf.equals('path', 'maps_www.routes_panel.form.travel_modes'),
                nf.or_(
                    nf.equals('event_type', 'show'),
                    nf.equals('event_type', 'change_state')
                )
            ),
            nf.and_(
                nf.custom(lambda x: x in build_route_paths, 'path'),
                nf.equals('event_type', 'click')
            ),
        )
    ).project(
        ne.all(),
        permalink=ne.custom(
            lambda x: x.get('permalink', None)
            if x.get('type', None) == 'bizfinder'
            else None, 'vars'
        ),
        mode=ne.custom(lambda x, y: max(
                            x.get('mode', None), x.get('type', None)
                            )
                       if y != 'click'
                       else None, 'vars', 'event_type'),
        country=ne.custom(lambda x: get_country_name(x), 'geo_id'),
        files=[nile_files.StatboxDict('Geobasev6.bin', use_latest=True)]
    ).filter(
        nf.equals('country', 'Россия')
    ).groupby(
        'yandexuid',
        'session_id'
    ).sort(
        'event_index'
    ).reduce(
        reducer
    ).project(
        'yandexuid',
        qe.unfold('route', sequence='routes')
    ).project(
        'yandexuid',
        permalink=ne.custom(lambda x: int(x[0]), 'route'),
        mode=ne.custom(lambda x: x[1], 'route')
    ).join(
        rubrics,
        type='left',
        by='permalink'
    ).put(
        '$job_root/routes_to_organisations_desktop',
        schema={
            'yandexuid': qt.String,
            'permalink': qt.Int64,
            'mode': qt.String,
            'rubric_names': qt.List[qt.String]
        }
    )

    job.run()


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-s',
        '-start_date',
        required=True,
        help='start date - format YYYY-MM-DD'
    )
    parser.add_argument(
        '-e',
        '-end_date',
        required=False,
        help='end date - format YYYY-MM-DD'
    )
    parser.add_argument(
        '-p',
        '-path',
        type=str,
        default='//home/geo-analytics/pruzhinkina/GEOHELL-124',
        help='path to result'
    )
    args = parser.parse_args()
    if not args.e:
        args.e = args.s

    main(args.s, args.e, args.p)
