#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import os
import argparse
from nile.api.v1 import (
    clusters,
    filters as nf,
    extractors as ne,
    aggregators as na,
    statface as ns,
    Record
)
from yql.api.v1.client import YqlClient
from videolog_common import (
    YqlRunner,
    date_range,
    yt_get_date_from_table as get_date,
    get_dates_from_stat,
    get_stat_headers
)


def process_date(date, query, report, cluster):
    report_path = "//home/videolog/mma_1913/yql/{date}".format(date=date)
    yc = YqlClient(token=os.environ["YQL_TOKEN"])
    yr = YqlRunner(yc, title="MMA-1913 | YQL")

    print('running yql query')
    yr.run(query.format(date=date))

    print('publishing to stat')
    recs = [x.to_dict() for x in cluster.read(report_path)]

    client = ns.StatfaceClient(
        proxy='upload.stat.yandex-team.ru',
        username=os.environ['STAT_LOGIN'],
        password=os.environ['STAT_TOKEN']
    )

    ns.StatfaceReport().path(
        report
    ).scale('daily').replace_mask(
        'fielddate'
    ).client(
        client
    ).data(
        recs
    ).publish()


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--report', default='Video/Others/Strm/imp_id_fails')
    parser.add_argument('--query', default='mma_1913_query.yql')
    parser.add_argument('--from', default=None)
    parser.add_argument('--pool', default=None)
    parser.add_argument('--to', default=None)
    args = parser.parse_args()

    kwargs = dict(
        proxy=os.environ['YT_PROXY'],
        token=os.environ['YT_TOKEN'],
    )
    if args.pool:
        kwargs['pool'] = args.pool

    cluster = clusters.YT(
        **kwargs
    )

    with open(args.query, 'r') as f:
        query = f.read()

    from_ = getattr(args, 'from')
    to_ = getattr(args, 'to')

    if from_ and to_:
        dates = date_range(from_, to_)
    else:
        stat_headers = get_stat_headers()

        last_date_from_stat = get_dates_from_stat(
            headers=stat_headers,
            report=args.report,
            dimensions=[]
        )[-1]
        available_dates = sorted(
            get_date(x) for x in cluster.driver.client.search(
                root='//logs/strm-access-log/1d',
                path_filter=lambda x: get_date(x)
            )
        )

        dates = [x for x in available_dates if x > last_date_from_stat]

    for date in dates:
        print('processing {}'.format(date))
        process_date(date, query, args.report, cluster)


if __name__ == "__main__":
    main()
