import argparse
import datetime
import time
import sys

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    clusters,
    Record,
)
import nile
import pandas as pd
import yt.wrapper as yt
import logging
logging.basicConfig(format='[%(asctime)s] %(filename)s[LINE:%(lineno)d] %(levelname)-8s %(message)s',
                    level=logging.ERROR)

JOB_NAME = 'TURBO ERRORS'
LOG_ROOT = '//logs/redir-log/30min'
RETRY_NUM = 24 * 4
SLEEP_TIME = 60 * 15


def str_to_timestamp(str_time):
    parsed_time = datetime.datetime.strptime(str_time, '%Y-%m-%dT%H:%M:%S')
    delta_time = (parsed_time - datetime.datetime.fromtimestamp(0))
    return int(delta_time.total_seconds())


def firstreduce(groups):
    for key, recs in groups:
        for request in recs:
            value_list = [value for value in request['value'].split('\t') if '=' in value]
            value_dict = {value.split('=')[0]: value.split('=')[1] for value in value_list}
            if 'path' in value_dict:
                req_day = str(datetime.datetime.fromtimestamp(float(value_dict['timestamp'])).isoformat()).split('T')[0]
                if '690.2719.2010' in value_dict['path']:
                    yield Record(
                        reqid=value_dict['reqid'],
                        yandexuid=value_dict['yandexuid'],
                        req_day=req_day,
                        event_type='touch|turbo_fallback',
                    )
                if '690.2719.1030' in value_dict['path']:
                    yield Record(
                        reqid=value_dict['reqid'],
                        yandexuid=value_dict['yandexuid'],
                        req_day=req_day,
                        event_type='touch|turbo_error',
                    )


def argument_parser():
    parser = argparse.ArgumentParser(description='Get parameters')
    parser.add_argument(
        '-p',
        dest='pool',
        type=str,
        help='you hahn pool',
    )
    parser.add_argument(
        '-r',
        dest='rpath',
        type=str,
        help='result path',
    )
    parser.add_argument(
        '-t',
        dest='timestamp',
        type=int,
        help='timestamp',
    )

    parser.add_argument(
        '-dc',
        dest='cl',
        type=str,
        help='cluster_type',
    )
    args = parser.parse_args()
    return args


def main():
    args = argument_parser()
    timestamp = args.timestamp
    job_root = args.rpath
    cl = args.cl
    print >> sys.stdout, cl
    cluster_type = cl.title()

    if timestamp > 100000000000:
        timestamp = timestamp / 1000
    for retry in range(RETRY_NUM):
        try:
            log_table_list = yt.list(LOG_ROOT)
            needed_table = None
            for table in log_table_list:
                if (str_to_timestamp(table) <= timestamp) and (str_to_timestamp(table) > timestamp - 1800):
                    needed_table = table
            if needed_table is None:
                logging.error('Table not found, go to sleep for {} second'.format(retry, SLEEP_TIME))
                time.sleep(SLEEP_TIME)
            else:
                cluster = clusters.cluster_type(pool=args.pool).env(templates=dict(
                        job_root=job_root,
                        log_root=LOG_ROOT,
                    )
                )

                job = cluster.job(JOB_NAME + ': filter_logs_{}'.format(needed_table))
                all_log = job.table('$log_root/{}'.format(needed_table))
                result_logs = all_log.groupby('key').sort('subkey').reduce(firstreduce)
                aggregate_logs = result_logs.groupby('reqid', 'req_day', 'event_type').aggregate(
                    count=na.count(),
                )
                final_result = aggregate_logs.groupby('event_type', 'req_day').aggregate(
                    count=na.count(),
                ).put('$job_root/{}'.format(str_to_timestamp(needed_table)))
                job.run()
                break
        except:
            logging.error('Error with try {}, go to sleep for {} second'.format(retry, SLEEP_TIME))
            time.sleep(SLEEP_TIME)


if __name__ == "__main__":
    main()
