#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import os
import argparse
from nile.api.v1 import (
    clusters,
    filters as nf,
    extractors as ne,
    aggregators as na,
    statface as ns,
    Record
)
from pytils import (
    yql_run,
    date_range,
    yt_get_date_from_table as get_date,
    get_dates_from_stat,
    get_stat_headers
)
import re


def wrap_str(x):
    x = x.lower()
    x = re.sub('[^a-z]', '_', x)
    x = re.sub('_+', '_', x)
    return x


def process_date(
    date, query, report, cluster, cleanup=False, redo=False,
    title='run yql and push', scale='hourly'
):
    report_path = "//home/videolog/tmp/yql/{report}/{date}".format(
        date=date,
        report=wrap_str(report)
    )

    if redo or not cluster.driver.exists(report_path):
        print('running yql query')
        yql_run(
            query.replace(
                '%DATE%', str(date)
            ).replace(
                '%OUT_PATH%', report_path
            ), os.environ["YQL_TOKEN"],
            maxtries=60,
            title=title
        )

    print('publishing to stat')
    recs = [x.to_dict() for x in cluster.read(report_path)]

    client = ns.StatfaceClient(
        proxy='upload.stat.yandex-team.ru',
        username=os.environ['STAT_LOGIN'],
        password=os.environ['STAT_TOKEN']
    )

    ns.StatfaceReport().path(
        report
    ).scale(scale).replace_mask(
        'fielddate'
    ).client(
        client
    ).data(
        recs
    ).publish()

    if cleanup:
        cluster.driver.remove(report_path)


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--report')
    parser.add_argument('--query')
    parser.add_argument(
        '--available_dates_root',
        default='//logs/strm-access-log/1d'
    )
    parser.add_argument('--from', default=None)
    parser.add_argument('--pool', default=None)
    parser.add_argument('--to', default=None)
    parser.add_argument('--scale', default='hourly')
    parser.add_argument('--cleanup', action='store_true')
    parser.add_argument('--redo', action='store_true')
    parser.add_argument('--title', default='run yql and push')
    args = parser.parse_args()

    cluster = clusters.YT(
        proxy=os.environ['YT_PROXY'],
        token=os.environ['YT_TOKEN']
    )
    if args.pool:
        cluster = cluster.env(pool=args.pool)

    with open(args.query, 'r') as f:
        query = f.read()

    from_ = getattr(args, 'from')
    to_ = getattr(args, 'to')

    if from_ and to_:
        dates = date_range(from_, to_)
    else:
        stat_headers = get_stat_headers()

        last_date_from_stat = get_dates_from_stat(
            headers=stat_headers,
            report=args.report,
            dimensions=[]
        )[-1]
        available_dates = sorted(
            get_date(x) for x in cluster.driver.client.search(
                root=args.available_dates_root,
                path_filter=lambda x: get_date(x)
            )
        )

        dates = [x for x in available_dates if x > last_date_from_stat]
        print(dates)

    for date in dates:
        print('processing {}'.format(date))
        process_date(
            date, query, args.report, cluster,
            cleanup=args.cleanup, redo=args.redo, scale=args.scale,
            title=args.title
        )


if __name__ == "__main__":
    main()
