#!/usr/bin/env python
# -*- coding: utf-8 -*-

from nile.api.v1 import (
    Record,
    files,
    clusters,
    cli,
    with_hints,
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns #obligatory for Statface
)
from qb2.api.v1 import (
    QB2,
    resources as sr
)

import os #obligatory for Statface
import sys #obligatory for Statface
import re #obligatory for Statface
import argparse #obligatory for Statface
import getpass #obligatory for Statface
import datetime
import time
import re
import itertools


@with_hints(
    output_schema=dict(
        actiond=str,
        actiontype=str,
        device=str,
        service=str,
        fielddate=str,
        count=int
    )
)
def replace_ugc(recs):
    for rec in recs:
        service = rec.service
        device=rec.device
        if service not in ["collections", "answers", "sprav", "district", "services", "direct"]:
            service = "ugc"
        yield Record(actiond = rec.actiond,
                    actiontype = rec.actiontype,
                    service = service,
                    device = device,
                    fielddate = rec.fielddate,
                    count = rec.count)

@with_hints(
    output_schema=dict(
        actiond=str,
        actiontype=str,
        device=str,
        service=str,
        fielddate=str,
        count=int
    )
)
def replace_device(recs):
    for rec in recs:
        device=rec.device
        if rec.device=="" or rec.device is None:
            device = "desktop"
        yield Record(actiond = rec.actiond,
                    actiontype = rec.actiontype,
                    service = rec.service,
                    device = device,
                    fielddate = rec.fielddate,
                    count = rec.count)

@with_hints(
    output_schema=dict(
        actiond=str,
        actiontype=str,
        device=str,
        service=str,
        fielddate=str,
        count=int
    )
)
def add_totals_v2(recs):
    for rec in recs:
        for pair in itertools.product(
                (rec.actiond, '_total_'),
                (rec.actiontype, '_total_'),
                (rec.service, '_total_'),
                (rec.device, '_total_'),
                (rec.fielddate, ),
                (rec.count, )
                ):
            yield Record(
                actiond=pair[0],
                actiontype=pair[1],
                service=pair[2],
                device=pair[3],
                fielddate=pair[4],
                count=pair[5]
                )

@cli.statinfra_job

def make_job(job, nirvana, statface_client, options):

    job = job.env(
        yt_spec_defaults=dict(
            pool_trees=["physical"],
            tentative_pool_trees=["cloud"]
        ),
        templates=dict(
            job_root=nirvana.directories[0],
            tmp_root='//home/images/tmp'
        )
    )

    mydates = options.dates
    strdate = mydates[-1]

    report = ns.StatfaceReport() \
        .path('Adhoc/rkam/UGC_detailed_service_actiontype') \
        .scale('daily') \
        .client(statface_client)

    report2 = ns.StatfaceReport() \
        .path('Adhoc/rkam/UGC_detailed_service_actiontype_v2') \
        .scale('daily') \
        .client(statface_client)

    report3 = ns.StatfaceReport() \
        .path('Adhoc/rkam/UGC_detailed_service_actiontype_withLogins') \
        .scale('daily') \
        .client(statface_client)

    report4 = ns.StatfaceReport() \
        .path('Adhoc/rkam/EntitySearch_detailed_service_actiontype_withLogins') \
        .scale('daily') \
        .client(statface_client)

    base = datetime.datetime.strptime(strdate, '%Y-%m-%d').date()
    depth = 30

    delta = base - datetime.datetime.strptime('2018-04-08', '%Y-%m-%d').date()
    if delta.days < 30:
        depth = delta.days

    date_list = [(base - datetime.timedelta(days=x)).strftime('%Y-%m-%d') for x in range(0, depth)]
#    date_list2 = [(base - datetime.timedelta(days=x)).strftime('%Y-%m-%d') for x in range(0, 1)]

    log1 = job.concat(*[job.table('$job_root/collections/'+d2) for d2 in date_list])
    log1cards = job.table('$job_root/collections/cards30d')
    log2 = job.concat(*[job.table('$job_root/answers/'+d2) for d2 in date_list])
    log3 = job.table('$job_root/ugc/report_other_30d')
    log4 = job.concat(*[job.table('$job_root/passport/'+d2) for d2 in date_list])
    log5 = job.table('$job_root/sprav/report_30d')
    log6e1 = job.table('$job_root/ugc/entity_30d')
    log6e2 = job.table('$job_root/collections/entity_cards30d')
    log7 = job.concat(*[job.table('$job_root/district/'+d2, ignore_missing=True) for d2 in date_list])
    log8 = job.table('$job_root/ugc/report_ugc2_30d')

    data_raw = job.concat(log1, log1cards, log2, log3, log4, log5, log7, log8) \
        .map(replace_device)

    process_data_full = data_raw.map(add_totals_v2) \
        .groupby('fielddate', 'device', 'service', 'actiontype', 'actiond') \
        .aggregate(hits = na.sum('count')) \
        .put('$job_root/process_data_full') \
        .publish(report, allow_change_job=True)

    process_data_short = data_raw.map(replace_ugc) \
        .map(add_totals_v2) \
        .groupby('fielddate', 'device', 'service', 'actiontype', 'actiond') \
        .aggregate(hits = na.sum('count')) \
        .put('$job_root/process_data_short') \
        .publish(report2, allow_change_job=True)

    data_raw.map(replace_ugc) \
        .map(add_totals_v2) \
        .groupby('fielddate', 'device', 'service', 'actiontype', 'actiond') \
        .aggregate(hits = na.sum('count')) \
        .publish(report3, allow_change_job=True)

    entity = log6e1.concat(log6e2).map(replace_device) \
        .put('$job_root/log6e2_r1') \
        .map(add_totals_v2) \
        .put('$job_root/log6e2_r2') \
        .groupby('fielddate', 'device', 'service', 'actiontype', 'actiond') \
        .aggregate(hits = na.sum('count')) \
        .publish(report4, allow_change_job=True)

    return job


if __name__ == '__main__':
    cli.run()

