#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import sys
import os
import codecs
import argparse
from nile.api.v1 import (
    clusters,
    filters as nf,
    extractors as ne,
    aggregators as na,
    statface as ns,
    Record
)
from qb2.api.v1 import filters as sf, extractors as se
from pytils import (
    yt_get_date_from_table as gdft,
    date_range,
    get_dates_from_stat
)
import datetime
import math

STRM_META = '//home/videolog/strm_meta/iron_branch/concat'
REDIR_ROOT = '//logs/redir-log/1d'
JOB_ROOT = '//home/videoquality/vh_analytics/mma_1802'
STAT_HEADERS = {
    'StatRobotUser': os.environ['STAT_LOGIN'],
    'StatRobotPassword': os.environ['STAT_TOKEN']
}
report = 'Video/Others/Strm/CHMF_Highlights'


class DailyPathFilter(object):

    def __init__(self, date):
        self.date = date

    def __call__(self, path):
        path = path or ''
        if not path.endswith('/daily'):
            return False
        ydate = gdft(path)
        if ydate and ydate <= self.date:
            return True
        return False


def highlight_filter_chain(chain):
    chain = chain or []
    for el in chain:
        if el.get('UUID', '') == '45c96d8e4e1805faa5174cfed4b6b462':
            return True
    return False


def count_lvt_hb(hb):
    if hb <= 1:
        return 0
    return math.log((hb - 1) * 30)


def process_date(cluster, date, proxy='hahn'):
    job = cluster.job()

    meta = job.table(STRM_META)
    daily_table = '{}/{}/daily'.format(JOB_ROOT, date)
    report_table = '{}/{}/report'.format(JOB_ROOT, date)

    job.table(
        '{}/{}'.format(REDIR_ROOT, date)
    ).qb2(
        log='redir-log',
        fields=[
            'path',
            'yandexuid',
            se.log_field('content_id'),
            se.custom('is_hb', lambda x: 1 if 'heartbeat' in str(
                x) else 0, 'path'),
            se.custom('is_ads', lambda x: 1 if 'adStart' in str(
                x) else 0, 'path'),
        ],
        filters=[
            sf.defined('path', 'content_id'),
            sf.contains('path', 'player-events.')
        ],
        mode='yamr_lines', intensity='data'
    ).groupby(
        'yandexuid', 'content_id'
    ).aggregate(
        heartbeats=na.sum('is_hb'),
        ads=na.sum('is_ads'),
    ).project(
        ne.all(),
        tvt=ne.custom(lambda x: x * 30, 'heartbeats'),
        lvt=ne.custom(count_lvt_hb, 'heartbeats')
    ).join(
        meta, type='left', by_left='content_id', by_right='JoinKey'
    ).filter(
        nf.custom(
            highlight_filter_chain, 'chain'
        )
    ).project(
        ne.all(), content_name=ne.custom(
            lambda x: (x or [{'Name': '-'}])[-1]['Name'], 'chain'
        )
    ).sort(
        'yandexuid', 'content_name', 'content_id'
    ).put(
        daily_table
    )

    job.run()

    job = cluster.job()

    tables = [
        job.table(table) for table in cluster.driver.client.search(
            root=JOB_ROOT,
            path_filter=DailyPathFilter(date)
        )
    ]

    if tables:
        cumul = job.concat(
            *tables
        ).groupby(
            'content_name'
        ).aggregate(
            yandexuids_cumul=na.count_distinct_estimate('yandexuid'),
            tvt_cumul=na.sum('tvt'),
            lvt_cumul=na.sum('lvt'),
        )

    report = job.table(
        daily_table
    ).groupby(
        'content_name'
    ).aggregate(
        tvt=na.sum('tvt'),
        lvt=na.sum('lvt'),
        yandexuids_daily=na.count_distinct_estimate('yandexuid')
    )

    if tables:
        report = report.join(
            cumul, type='left', by='content_name'
        )
    else:
        report = report.project(
            ne.all(), yandexuids_cumul=ne.const(0)
        )

    report = report.project(
        ne.all(),
        fielddate=ne.const(str(date))
    ).sort(
        'fielddate', 'content_name'
    ).put(
        report_table
    )

    job.run()

    client = ns.StatfaceClient(
        proxy='upload.stat.yandex-team.ru',
        username=os.environ['STAT_LOGIN'],
        password=os.environ['STAT_TOKEN']
    )

    ns.StatfaceReport().path(
        'Video/Others/Strm/CHMF_Highlights'
    ).scale('daily').replace_mask(
        'fielddate'
    ).client(
        client
    ).remote_publish(
        proxy=proxy,
        table_path=report_table,
        async_mode=False,
        upload_config=False
    )


def main():
    global report
    parser = argparse.ArgumentParser()
    parser.add_argument('--from')
    parser.add_argument('--to')
    parser.add_argument('--report', default=report)
    parser.add_argument('--proxy', default='hahn')
    parser.add_argument('--pool', default='loadbase')
    args = parser.parse_args()

    report = args.report
    from_ = getattr(args, 'from')
    to_ = getattr(args, 'to')

    cluster = clusters.YT(
        token=os.environ['YT_TOKEN'],
        proxy=args.proxy,
        pool=args.pool
    )

    if from_ and to_:
        dates_to_process = list(date_range(from_, to_))
    else:
        last_date = get_dates_from_stat(
            headers=STAT_HEADERS,
            report=report,
            dimensions=[]
        )[-1]
        yt = cluster.driver.client

        dates_to_process = [gdft(x) for x in yt.search(
            root=REDIR_ROOT,
            path_filter=lambda x: (
                gdft(x) and gdft(x) > last_date
            )
        )]

    for date in dates_to_process:
        process_date(cluster, date, proxy=args.proxy)


if __name__ == "__main__":
    main()
