# -*- coding: utf-8 -*-
import datetime
import re
import yt.wrapper as yt
from calendar import monthrange
from collections import defaultdict
from datacloud.dev_utils.logging.logger import get_basic_logger
from datacloud.dev_utils.yt.yt_utils import get_yt_client
from datacloud.launcher.lib.routines.datacloud_api_solomon import extract_labels
from datacloud.launcher.lib.util import rotate_history_tables
from datacloud.dev_utils.time.patterns import RE_MONTH_LOG_FORMAT, RE_DAILY_LOG_FORMAT


logger = get_basic_logger(__name__)
RESPONSE_TABLE_TAG = '[RESPONSE TABLE MERGE]'

RESPONSE_TABLE_SCHEME = [
    {'name': 'partner_id', 'type': 'string'},  # qloud
    {'name': 'score_name', 'type': 'string'},  # qloud
    {'name': 'request_id', 'type': 'string'},  # router
    {'name': 'ts', 'type': 'double'},  # router
    {'name': 'response_time', 'type': 'double'},  # router
    {'name': 'status', 'type': 'string'},  # router
    {'name': 'has_score', 'type': 'boolean'},  # qloud
    {'name': 'backend_timestamps', 'type': 'any'},  # qloud
    {'name': 'backend_hosts', 'type': 'any'}  # qloud
]

LOG_SERVICE_HISTORY_DIR = '//home/x-products/production/services/logs/datacloud_score_api/production/events/history'
LOG_ROUTER_HISTORY_DIR = '//home/x-products/production/services/logs/router/history'
SERVICE_RESPONSE_HISTORY_DIR = '//home/x-products/production/services/logs/datacloud_score_api/production/responses'


@yt.with_context
def router_qloud_unifier_mapper(key, recs, context):
    """
    Reduce service and router logs to get one table according
    to schema above
    """
    has_score_dict = {'timeout': None, 'has_score': True, 'no_score': False}
    out_rec = dict()
    labels_list = []
    out_rec['request_id'] = key['request_id']
    backend_timestamps = []
    backend_hosts = []
    for rec in recs:
        if context.table_index == 0:
            out_rec['ts'] = float(rec['ts'])
            out_rec['response_time'] = float(rec['nginx_request_time'])
            out_rec['status'] = rec['status']
        else:
            out_rec['partner_id'] = rec['partner_id']
            backend_timestamps.append(rec['timestamp'])
            backend_hosts.append(rec['host'])
            labels_list = extract_labels(rec)
            out_rec['backend_timestamps'] = backend_timestamps
            out_rec['backend_hosts'] = backend_hosts

    if out_rec.get('status'):
        for labels in labels_list:
            out_rec['has_score'] = has_score_dict[labels['type']]
            out_rec['score_name'] = labels['score_name']
            yield out_rec


def unify_router_and_qloud(router_table, qloud_table, out_table, yt_client):
    """
    Taking router log table and service log table and generate one
    table with selected columns about billing
    """
    if yt.exists(router_table) and yt.exists(qloud_table):
        with yt_client.Transaction(), \
                yt_client.TempTable(prefix='service_log_with_request_id') as tmp_qloud_with_request_id:
            spec_extracting = {'title': RESPONSE_TABLE_TAG + 'extracting_request_id'}

            yt_client.run_map(
                request_id_extractor_mapper,
                qloud_table,
                tmp_qloud_with_request_id,
                spec=spec_extracting
            )
            spec_sorting = {'title': RESPONSE_TABLE_TAG + 'sorting'}
            yt_client.run_sort(router_table, sort_by='request_id', spec=spec_sorting)
            yt_client.run_sort(tmp_qloud_with_request_id, sort_by='request_id', spec=spec_sorting)
            spec_unifying = {'title': RESPONSE_TABLE_TAG + 'unifying'}
            yt_client.run_reduce(
                router_qloud_unifier_mapper,
                [router_table, tmp_qloud_with_request_id],
                yt.TablePath(out_table, schema=RESPONSE_TABLE_SCHEME),
                reduce_by='request_id',
                spec=spec_unifying
            )
    else:
        if yt.exists(router_table):
            raise OSError('YT table is not exists:', qloud_table)
        else:
            raise OSError('YT table is not exists:', router_table)


def run_unify_router_and_qloud(task):
    router_table, qloud_table = task.data['table_router'], task.data['table_service']
    yt_client = get_yt_client()
    out_table = yt.ypath_join(SERVICE_RESPONSE_HISTORY_DIR, router_table.split('/')[-1])
    unify_router_and_qloud(router_table, qloud_table, out_table, yt_client)
    return [task.make_done()]


def validate_date(date_text, format_):
    try:
        datetime.datetime.strptime(date_text, format_)
        return True
    except ValueError:
        return False


def detect_ready_service_and_router_logs(date_time, days=None):
    yt_client = get_yt_client()
    for table_service in yt_client.list(LOG_SERVICE_HISTORY_DIR, absolute=True):
        for table_router in yt_client.list(LOG_ROUTER_HISTORY_DIR, absolute=True):
            date = table_service.split('/')[-1]
            if (re.match(RE_DAILY_LOG_FORMAT, date) or re.match(RE_MONTH_LOG_FORMAT, date)) and (table_service.split('/')[-1] == table_router.split('/')[-1]):
                key = '#'.join([table_router, table_service])
                yield key, {'table_router': table_router, 'table_service': table_service}


def check_all_days_in_the_month():
    yt_client = get_yt_client()
    table_list = yt_client.list(SERVICE_RESPONSE_HISTORY_DIR)
    table_list = filter(lambda a: re.match(RE_DAILY_LOG_FORMAT, a), table_list)
    unmerged_monthes = sorted(list(set(map(lambda a: a[:-3], table_list))))
    splitted_by_month = defaultdict(list)
    for table in table_list:
        for month in unmerged_monthes:
            if re.match(month, table):
                splitted_by_month[month].append(table)

    for month in unmerged_monthes:
        days_in_month = monthrange(int(month[:4]), int(month[5:]))[1]
        if max(splitted_by_month[month]) == days_in_month and month == max(unmerged_monthes):
            assert days_in_month == splitted_by_month[month], 'Some days are lost for {} month'.format(month)
            logger.info('Month {} is ready for merge'.format(month))
        else:
            logger.info('Month {} is not ready yet'.format(month))


def merge_response_logs(history_root=SERVICE_RESPONSE_HISTORY_DIR):
    yt_client = get_yt_client()
    rotate_history_tables(yt_client, history_root)


def request_id_extractor_mapper(rec):
    """
    Mapper to add column for service logs
    for tmp table which will be used later
    """
    rec['request_id'] = rec['context']['request_id']
    yield rec


if __name__ == '__main__':
    yt_client = get_yt_client()
    # router_table = '//home/x-products/production/services/logs/router/history/2018-11'
    # qloud_table = '//home/x-products/production/services/logs/datacloud_score_api/production/events/history/2018-11'
    # out_table = '//projects/scoring/tmp/nryzhikov/test_router_qloud_2018-11'
    # unify_router_and_qloud(router_table, qloud_table, out_table, yt_client)
    print run_unify_router_and_qloud('//home/x-products/production/services/logs/router/history/2018-12-03#//home/x-products/production/services/logs/datacloud_score_api/production/events/history/2018-12-03')  # noqa
    print('DRONE!!!')
