#!/usr/bin/env python
# -*- coding: utf-8 -*-

# In[1]:

from nile.api.v1 import (
    clusters,
    with_hints,
    Record,
    aggregators as na
)

from nile.api.v1 import filters as nf, aggregators as na
from qb2.api.v1 import filters as sf, extractors as se

from datetime import datetime as dt, timedelta

import json
import argparse
import requests
import re
import os
import urllib

from statface_client import StatfaceClient
import itertools


# In[2]:

# где будут жить логи на кластере
job_root = '//home/videolog/strm_video'
# job_root = '//home/morda/video'

# Лимит на параллельные операции
parallel_operations_limit = 30

# Дименшены ошибок
error_dimensions = ['browser', 'browser_version', 'error_id', 'player_version']


# In[3]:

cluster = None


# In[4]:

def get_yesterday_date():
    yesterday = dt.now() - timedelta(days=1)
    return yesterday.strftime('%Y-%m-%d')


def check_table(table):
    job = cluster.job()

    try:
        job.table(table).read()

        return True
    except:
        return False


# In[5]:

# ошибки
def get_error_id(parsed_parameters):
    try:
        if parsed_parameters.get('error_id'):
            return parsed_parameters.get('error_id')[0]

        if parsed_parameters.get('event_id')[0] == 'Buffer.Empty':
            return parsed_parameters.get('event_id')[0]
    except:
        return None


def get_player_version(parsed_parameters):
    try:
        return parsed_parameters.get('player_version')[0]
    except:
        return None


def get_video_errors(date):
    video_errors_table = '$job_root/%s/errors_for_report' % date

    if not check_table(video_errors_table):
        job = cluster.job()
        logs = job.table('//statbox/strm-access-log/%s' % date)

        timings = logs.qb2(
            log='strm-access-log',
            fields=[
                ['date', 'browser', 'browser_version', 'parsed_parameters'],
                se.custom('error_id', lambda parsed_parameters: get_error_id(
                    parsed_parameters)),
                se.custom('player_version', lambda parsed_parameters: get_player_version(
                    parsed_parameters)),
            ],
            filters=[
                nf.custom(lambda x: x.find(
                    'for-regional-cache=1') == -1, 'request'),
                sf.defined('browser'),
                sf.defined('browser_version'),
                sf.defined('error_id'),
                sf.defined('player_version'),
                nf.custom(lambda x: x is not None and (x.get('error_id') is not None or x.get(
                    'event_id') is not None), 'parsed_parameters'),
            ]
        ).put(video_errors_table)

        job.run()


def run_video_errors(date):
    video_errors_table = '$job_root/%s/errors_for_report' % date
    video_errors_report = '$job_root/%s/errors_report' % date

    if not check_table(video_errors_report):
        job = cluster.job()

        for i in range(0, len(error_dimensions) + 1):
            for subset in itertools.combinations(error_dimensions, i):
                other_dimensions = list(subset) + ['date']

                job.table(video_errors_table).groupby(*other_dimensions).aggregate(
                    count=na.count()
                ).put(video_errors_report + '/' + '_'.join(other_dimensions))

        job.run()


def pub_video_errors(date):
    data = []
    total = '_total_'

    for i in range(0, len(error_dimensions) + 1):
        for subset in itertools.combinations(error_dimensions, i):
            other_dimensions = list(subset) + ['date']

            video_errors_report = '$job_root/%s/errors_report' % date + \
                '/' + '_'.join(other_dimensions)

            errors_data = cluster.job().table(video_errors_report).read()

            for index, item in enumerate(errors_data):
                data_item = {
                    'fielddate': item['date'],
                    'count': item['count']
                }

                for error_dimension in error_dimensions:
                    if item.get(error_dimension):
                        data_item[error_dimension] = item.get(error_dimension)
                    else:
                        data_item[error_dimension] = total

                data.append(data_item)

    stat_client = StatfaceClient(
        'robot_ivan-karev', 'oos4Fah2Ai', host='upload.stat.yandex-team.ru:443')
    report = stat_client.get_report('Morda/Strm/player_errors')

    # with open('errors.yaml') as config_fd:
    #    config = config_fd.read()

    # report.upload_config(config)
    report.upload_data('d', data)


# In[6]:

def clean_tables(date):
    table_names = [
        '/%s/errors' % date,
        '/%s/errors_report' % date
    ]

    for name in table_names:
        cluster.driver.remove(job_root + name, recursive=True)


# In[7]:


def get_date(s):
    try:
        return dt.strptime(
            re.search(r'[0-9]{4}-[0-9]{2}-[0-9]{2}', s).group(0),
            '%Y-%m-%d'
        ).date()
    except (ValueError, TypeError, AttributeError):
        return


def run(**kwargs):
    date = get_yesterday_date()

    if kwargs.get('date'):
        date = kwargs.get('date')

    start_ts = dt.now()

    get_video_errors(date)
    run_video_errors(date)
    pub_video_errors(date)
    clean_tables(date)

    running_time = int((dt.now() - start_ts).total_seconds())
    print 'Done! Running time %2d:%2d' % (running_time / 60, running_time % 60)


# In[56]:

if __name__ == '__main__':
    global cluster
    parser = argparse.ArgumentParser()
    args = parser.parse_args()

    cluster = clusters.yt.Hahn(token=os.environ['YT_TOKEN']).env(
        templates=dict(
            job_root=job_root
        ),
        parallel_operations_limit=parallel_operations_limit
    )

    report = 'Morda/Strm/player_errors'
    headers = {
        'StatRobotUser': 'robot_ivan-karev',
        'StatRobotPassword': 'oos4Fah2Ai'
    }
    print('getting dates from report')
    req = requests.get(
        'https://upload.stat.yandex-team.ru/{}?browser=_total_&browser_version=_total_&error_id=_total_&player_version=_total_&_type=json'.format(
            report),
        headers=headers, verify=False
    )
    print('parsing response')

    values = sorted(
        req.json()['values'], key=lambda x: x['fielddate'], reverse=True
    )
    last_date = get_date(
        values[0]['fielddate'].split(' ')[0]
    )

    print('last date: {}'.format(last_date))

    print('getting available dates')

    available_dates = sorted(get_date(s) for s in cluster.driver.client.search(
        root='//statbox/strm-access-log', node_type="table"
    ) if get_date(s))

    dates_to_process = [
        x for x in available_dates if x > last_date
    ]

    print('dates to process: {}'.format(dates_to_process))

    for date in dates_to_process:
        print('running for {}'.format(date))
        run(date=format(date))
