#!/usr/bin/env python
# -*- coding: utf-8 -*-

u"""Регулярная выгрузка запросов в веб и для колдунщиков."""

import os
from os.path import isfile

import re
import shlex

from time import sleep
from copy import deepcopy
from glob import glob
from subprocess import call, Popen, PIPE
from collections import defaultdict
from datetime import date, datetime, timedelta
from dateutil.relativedelta import relativedelta


import query
import wizard_query

from helper import set_expiration_time

import yt.wrapper as ytw
from nile.files import LocalFile
from nile.api.v1 import clusters

RETRIES = [5, 25, 30, 60, 60, 120]
DATA_TABLE_PATH = '//home/antiwizard/query_viewer/data'


class Args(object):
    u"""Объект-свалка для передачи параметров."""

    def __init__(self, **kwargs):
        u"""Все параметры сваливаются в __dict__ инстанса."""
        self.__dict__.update(kwargs)

    def __getattr__(self, key):
        u"""Доставать из __dict__ по нотации через точку."""
        return self.__dict__.get(key)

    def __setattr__(self, key, value):
        u"""Класть в __dict__ по нотации через точку."""
        self.__dict__[key] = value


def remove_files():
    files = [
        'queries_aggr_desktop*.xz',
        'queries_aggr_tablet*.xz',
        'queries_aggr_touch*.xz',
        'queries_desktop*.xz',
        'queries_tablet*.xz',
        'queries_touch*.xz',
        'wizard_queries_aggr_desktop*.xz',
        'wizard_queries_aggr_tablet*.xz',
        'wizard_queries_aggr_touch*.xz',
        'wizard_queries_desktop*.xz',
        'wizard_queries_tablet*.xz',
        'wizard_queries_touch*.xz',
    ]
    curdir = os.path.curdir

    for f in files:
        file_path = os.path.join(curdir, f)
        for glob_file in glob(file_path):
            if isfile(glob_file):
                os.remove(glob_file)


def datetime_diff(t_a, t_b):
    u"""Выдаёт разницу во времени между двуями объектами datetime.datetime."""
    times = sorted([t_a, t_b], reverse=True)
    t_diff = relativedelta(*times)
    return '{h}h {m}m {s}s'.format(h=t_diff.hours, m=t_diff.minutes, s=t_diff.seconds)


def duration(func):
    u"""Обёртка для подсчёта времени выполнения функций."""
    def wrap(*args):
        before = datetime.now()
        function_output = func(*args)
        after = datetime.now()
        return (
            function_output,
            '{function} took {time} to finish'.format(function=func.func_name, time=datetime_diff(before, after))
        )
    return wrap


def get_time_now():
    u"""Вернуть текущее время."""
    return datetime.now().strftime('%Y-%m-%d %H:%M:%S')


def create_data_json():
    u"""Создание шаблона для данных."""
    data_dict = {
        'dates': [],
    }
    platforms_dict = {
        'desktop': deepcopy(data_dict),
        'tablet': deepcopy(data_dict),
        'touch': deepcopy(data_dict),
    }
    type_dict = {
        'all': deepcopy(platforms_dict),
        'unique': deepcopy(platforms_dict),
    }
    new_data_json = {
        'queries': deepcopy(type_dict),
        'wizards': deepcopy(type_dict),
    }

    return new_data_json


def needs_update(_js, _today):
    u"""Проверить необходимость обновления таблиц.
    Args:
        _js (dict): Информация о выгруженных данных.
        _today (datetime.date): Сегодняшняя дата.
    Returns:
        (bool, bool): Флаг о необходимости обновления запросов (веб), запросов (колдунщики).
    """
    last_query_date_str = _js['queries']['all']['desktop']['dates'][-1][0]
    last_wizard_query_date_str = _js['wizards']['all']['desktop']['dates'][-1][0]

    if last_query_date_str:
        last_query_date = datetime.strptime(last_query_date_str, '%Y-%m-%d').date()
        q_time_elapsed = (_today - last_query_date).days
        q_needs_update = q_time_elapsed > 7
    else:
        q_needs_update = True

    if last_wizard_query_date_str:
        last_wizard_query_date = datetime.strptime(last_wizard_query_date_str, '%Y-%m-%d').date()
        wq_time_elapsed = (_today - last_wizard_query_date).days
        wq_needs_update = wq_time_elapsed > 1
    else:
        wq_needs_update = True

    return q_needs_update, wq_needs_update


def update_queries(_yesterday_str):
    u"""Запустить выгрузку новых запросов (веб)."""

    if not query.check_q_exists(_yesterday_str) and not query.check_nano_us_exists(_yesterday_str):
        for retry in RETRIES:
            if not query.check_nano_us_exists(_yesterday_str):
                sleep(retry * 60)
            else:
                break
    elif query.check_q_exists(_yesterday_str) and not query.check_q_empty(_yesterday_str):
        return True

    if query.check_nano_us_exists(_yesterday_str):
        query.run_nano_parse(Args(
            session_dates=_yesterday_str
        ))
        return True
    else:
        return False


def update_wizard_queries(_yesterday_str):
    u"""Запустить выгрузку новых запросов (колдунщики)."""

    if not wizard_query.check_wq_exists(_yesterday_str) and not wizard_query.check_us_exists(_yesterday_str):
        for retry in RETRIES:
            if not wizard_query.check_us_exists(_yesterday_str):
                sleep(retry * 60)
            else:
                break
    elif wizard_query.check_wq_exists(_yesterday_str) and not wizard_query.check_wq_empty(_yesterday_str):
        return True

    if wizard_query.check_us_exists(_yesterday_str):
        wizard_query.main(Args(
            session=_yesterday_str
        ))
        return True
    else:
        return False


def split_map(records, desktop, tablet, touch):
    u"""Мап для разделения записей по платформам."""
    for record in records:
        if record.platform == 'desktop':
                desktop(record)
        if record.platform == 'tablet':
                tablet(record)
        if record.platform == 'touch':
                touch(record)


def run_split(_cluster, _table_path):
    u"""Запуск разделения по платформам для таблицы и установка ttl для таблицы."""
    job = _cluster.job()
    table = job.table(_table_path)

    desktop_queries, tablet_queries, touch_queries = table \
        .map(split_map, memory_limit=2048, intensity='data')
    desktop_queries.put('$tmp_folder/desktop')
    tablet_queries.put('$tmp_folder/tablet')
    touch_queries.put('$tmp_folder/touch')
    job.run()

    set_expiration_time(job.table('$tmp_folder/desktop'))
    set_expiration_time(job.table('$tmp_folder/tablet'))
    set_expiration_time(job.table('$tmp_folder/touch'))


def split_and_download(_yesterday_str, _q_up, _wq_up, js):
    u"""Запуск операций и выгрузка результатов для таблиц."""
    files = map(LocalFile, ['qv_update.py'])
    cluster = clusters.Hahn().env(
        templates=dict(
            queries='//home/search-research/antonka/queries',
            wizard_queries='//home/search-research/antonka/wizard-queries',
            tmp_folder='//home/search-research/antonka/temp',
            date=_yesterday_str,
            date_aggr=_yesterday_str + '_aggr'
        ),
        files=files
    )

    queries_table_path = '$queries/$date/queries'
    queries_aggr_table_path = '$queries/$date/aggregated_queries'

    wizard_queries_table_path = '$wizard_queries/$date'
    wizard_queries_aggr_table_path = '$wizard_queries/$date_aggr'

    if _q_up:
        with open('qv_update.log', 'a') as logfile:
            logfile.write('\nStarted working on queries for {ystd} at {now}\n'.format(
                ystd=_yesterday_str,
                now=get_time_now()
            ))
        run_split(cluster, queries_table_path)
        download_tables('queries', ('queries', 'all'), _yesterday_str, js, ttl=30, dates_to_keep=4)

        run_split(cluster, queries_aggr_table_path)
        download_tables('queries_aggr', ('queries', 'unique'), _yesterday_str, js, ttl=30, dates_to_keep=4)

    if _wq_up:
        with open('qv_update.log', 'a') as logfile:
            logfile.write('\nStarted working on wizards for {ystd} at {now}\n'.format(
                ystd=_yesterday_str,
                now=get_time_now()
            ))
        run_split(cluster, wizard_queries_table_path)
        download_tables('wizard_queries', ('wizards', 'all'), _yesterday_str, js, ttl=7, dates_to_keep=7)

        run_split(cluster, wizard_queries_aggr_table_path)
        download_tables('wizard_queries_aggr', ('wizards', 'unique'), _yesterday_str, js, ttl=7, dates_to_keep=7)


def download_tables(filename, js_path, ystd, js, ttl, dates_to_keep):
    u"""Выгрузка таблиц, архивирование и загрузка в sandbox."""
    @duration
    def read_table():
        call(shlex.split(table_read), stdout=output_file)

    @duration
    def compress_file():
        call(shlex.split(file_compress), stdout=logfile)

    @duration
    def upload_to_sandbox():
        ya_upload_op = Popen(shlex.split(ya_upload), stdout=PIPE, stderr=PIPE)
        ya_upload_op.wait()

        return ya_upload_op.communicate()[1]

    TABLE_NAMES = ['desktop', 'tablet', 'touch']

    fields = dict(
        queries='query;platform;domain;region',
        queries_aggr='query;platform;domain;count',
        wizard_queries='query;platform;domain;region;path;wiz_type',
        wizard_queries_aggr='query;platform;domain;path;wiz_type;count',
    )

    for tname in TABLE_NAMES:
        table_read = 'yt read {table} --format "<columns=[{columns}]>schemaful_dsv"' \
            .format(
                table='//home/search-research/antonka/temp/' + tname,
                columns=fields[filename],
            )
        file_compress = 'xz -T 0 -fvz {filename}'.format(filename=filename + '_' + tname + '_' + ystd)
        ya_upload = 'ya upload --skynet --ttl={ttl} -d=\"{message}\" {filename} --token=\"{token}\"' \
            .format(
                ttl=ttl,
                message=filename + '_' + tname + ' for ' + ystd,
                filename=filename + '_' + tname + '_' + ystd + '.xz',
                token=open('.token').read().strip()
            )
        with open('qv_update.log', 'aw') as logfile:
            with open(filename + '_' + tname + '_' + ystd, 'w') as output_file:
                read_duration = read_table()[1]
            compress_duration = compress_file()[1]

            for x in xrange(4):
                ya_upload_output, ya_upload_duration = upload_to_sandbox()
                logfile.write('\n' + ya_upload_output + '\n')
                if 'Created task' in ya_upload_output and 'Download link' in ya_upload_output:
                    task = defaultdict(str)
                    for line in ya_upload_output.split('\n'):
                        if 'Created task' in line:
                            task['task_link'] = re.search(
                                ur'Created task REMOTE_COPY_RESOURCE (?P<task>.*)', line.strip()).group('task')
                        elif 'Download link' in line:
                            task['resource_link'] = re.search(
                                ur'Download link: (?P<resource>.*)', line.strip()).group('resource')
                    break
                else:
                    sleep(60)

            logfile.write(
                '\nLog: {filename}_{tname}\tLog Time: {ystd}\tFinish Time: {now}\n'.format(
                    filename=filename,
                    tname=tname,
                    ystd=ystd,
                    now=get_time_now()
                ))
            logfile.write(read_duration + '\n')
            logfile.write(compress_duration + '\n')
            logfile.write(ya_upload_duration + '\n')
            logfile.write('Created task: ' + task['task_link'] + '\n')
            logfile.write('Resource link: ' + task['resource_link'] + '\n')

            dates_list = js[js_path[0]][js_path[1]][tname]['dates']
            if len(dates_list) == dates_to_keep:
                js[js_path[0]][js_path[1]][tname]['dates'].pop(0)
            js[js_path[0]][js_path[1]][tname]['dates'].append((ystd, task['resource_link']))


def main():
    u"""Основная функция для запуска всех операций."""
    today = date.today()
    today_str = today.strftime('%Y-%m-%d')
    yesterday = (today - timedelta(days=1))
    yesterday_str = yesterday.strftime('%Y-%m-%d')

    with open('qv_update.log', 'a') as logfile:
        logfile.write('\nStarted working at {now}\n'.format(now=get_time_now()))

    if not ytw.is_empty(DATA_TABLE_PATH):
        data_js = ytw.read_table(DATA_TABLE_PATH).next()
    else:
        data_js = create_data_json()

    q_up, wq_up = needs_update(data_js, today)
    q_status = wq_status = False

    if q_up:
        q_status = update_queries(yesterday_str)

    # if wq_up:
    #     wq_status = update_wizard_queries(yesterday_str)

    split_and_download(yesterday_str, q_up & q_status, wq_up & wq_status, data_js)
    data_js['date'] = today_str

    ytw.write_table(DATA_TABLE_PATH, [data_js], raw=False, format='json')

    # remove_files()

    with open('qv_update.log', 'aw') as logfile:
        logfile.write('\nFinished working at {now}\n'.format(now=get_time_now()))


if __name__ == '__main__':
    main()
