#!/usr/bin/env python
#  -*- coding: utf-8 -*-
'''historical re-calc of the collections cube based on discovery collections
history and visits from mobile maps and desktop'''

import copy as cp
import time
import argparse
import datetime

from qb2.api.v1 import typing as qt
from nile.api.v1 import clusters

from extractors import extract
from processors import process

COLLECTIONS_SCHEMA = {
    'avatar_img': qt.String,
    'description': qt.String,
    'fielddate': qt.String,
    'geoid': qt.Dict[qt.String, qt.Integer],
    'id': qt.String,
    'permalinks': qt.List[qt.Integer],
    'properties': qt.Yson,
    'title': qt.String,
    'update_ts': qt.String,
    'shows': qt.Integer,
    'user_id': qt.String,
    'service': qt.String,
}

DATALENS_SCHEMA_BASE = {
    'service': qt.String,
    'fielddate': qt.String,
    'collection_region': qt.Unicode,
    'collection_id': qt.String,
    'partner_name': qt.Unicode,
    'rubric_text': qt.Unicode,
    'shows': qt.Integer,
}

DATALENS_SCHEMAS = {
    'collections': {
        'unique_users': qt.Integer,
        'unique_permalinks': qt.Integer,
    },
    'orgs': {
        'permalink': qt.Integer,
        'unique_users': qt.Integer,
    },
    'users': {
        'user_id': qt.String,
    }
}

COMMON_FIELDS = [
    'fielddate', 'collection_id', 'service', 'collection_region',
    'rubric_text', 'partner_name'
]

MODAL_FIELDS = {
    'collections': [],
    'users': ['user_id'],
    'orgs': ['permalink'],
}


def define_cluster(clusters_obj, **templates):
    '''set hahn environment and return ready to use cluster-object'''
    tmp = {
        'redir_log': 'logs/redir-log/1d',
        'bebr_squized': 'home/geo-analytics/heyroman/extracts/bebr',
        'mobmetrika': 'logs/metrika-mobile-log/1d',
        'checkpoints_root': 'home/geo-analytics/tm-42/checkpoints',
        'discovery': 'home/geosearch/collections/production',
        'collections_history': '$discovery/history/expert_collections',
        'tm': 'home/geo-analytics/tm-42',
        'tmp_files': '$tm/_tmp'
    }
    tmp.update(templates)

    return clusters_obj.Hahn().env(
        yt_spec_defaults={
            'pool_trees': ['physical'],
            'tentative_pool_trees': ['cloud'],
        },
        parallel_operations_limit=10,
        templates=tmp
    )


def make_dates(dates=None):
    '''make a proper format dates for the job'''

    if dates is None:
        date_from = '2018-07-04'
        today = time.localtime()
        yesterday = datetime.datetime(*today[:3]) - datetime.timedelta(1)
        date_to = yesterday.strftime('%Y-%m-%d')
        dates = '{%s..%s}' % (date_from, date_to)
    else:
        if '..' in dates:
            date_from, date_to = dates.split('..')
            dates = '{%s}' % dates
        else:
            date_from = date_to = dates
            dates = '{%s..%s}' % (date_from, date_to)

    return dates, date_to


def make_run_job(path, datalens=False, inplace=False, dates=None):
    '''prepare and run job on discovery collections history'''

    dates, date_to = make_dates(dates)
    cluster = define_cluster(clusters, dates=dates, collections_cube=path)
    job = cluster.job('discovery-collections-history').env(
        files=['extractors.py', 'processors.py']
    )

    streams = extract(cluster, job)
    cube_stream, datalens_streams = process(job, streams, datalens)

    if inplace:
        cube_stream = job.concat(
            job.table('$collections_cube/%s' % date_to), cube_stream
        ).unique(
            'id',
            'fielddate',
            'user_id',
        )
    cube_stream.put(
        '$collections_cube/%s' % date_to,
        schema=COLLECTIONS_SCHEMA,
    )

    if datalens_streams:
        for i, mode in enumerate(['collections', 'users', 'orgs']):
            schema = cp.deepcopy(DATALENS_SCHEMA_BASE)
            schema.update(DATALENS_SCHEMAS[mode])
            stream = datalens_streams[i]

            if inplace:
                mode_fields = COMMON_FIELDS + MODAL_FIELDS[mode]
                stream = job.concat(
                    job.table(
                        '$collections_cube/datalens/%s-%s' % (date_to, mode)
                    ), stream
                ).unique(*mode_fields)
            stream.put(
                '$collections_cube/datalens/%s-%s' % (date_to, mode),
                schema=schema,
            )

    job.run()

    if datalens_streams:
        for mode in ['collections', 'users', 'orgs']:
            cluster.driver.client.link(
                '%s/datalens/%s-%s' % (path, date_to, mode),
                '%s/datalens/%s' % (path, mode),

                force=True
            )


if __name__ == '__main__':
    COMMAND_ARGUMENTS = argparse.ArgumentParser(
        description='''
        prepare and run a nile job on discovery collections basic metrics
        will collect all historical data'''
    )
    COMMAND_ARGUMENTS.add_argument(
        '--path',
        '-p',
        help='prefix to the path to put data to',
        type=str,
        default='//home/geo-analytics/maps/collections-cube'
    )
    COMMAND_ARGUMENTS.add_argument(
        '--datalens',
        '-dl',
        help='do we need datalens table as well',
        dest='datalens',
        action='store_true',
        default=False
    )
    COMMAND_ARGUMENTS.add_argument(
        '--dates',
        '-d',
        help='''
        dates for process in format first_date..last_date,
        where each date is fromatted like %Y-%m-%d''',
        type=str,
    )
    COMMAND_ARGUMENTS.add_argument(
        '--inplace',
        '-i',
        help='inplace recalc',
        dest='inplace',
        action='store_true',
        default=False
    )
    ARGS = vars(COMMAND_ARGUMENTS.parse_args())
    print ARGS

    make_run_job(**ARGS)

    exit()
