# -*- encoding: utf-8 -*-
import argparse
import functools
import os
import pytz
from datetime import datetime

import yt.wrapper as yt


def map(available_services, record):
    iso_eventtime = record.get('iso_eventtime')
    query_time = record.get('query_time')
    status = record.get('status')
    variants_len = record.get('variants_len')
    qid = record.get('qid')
    service = None
    if qid:
        for separator in ('.', ':'):
            if separator not in qid:
                continue

            parts = qid.split(separator)
            if len(parts) < 2:  # Broken records are possible
                continue

            service = parts[1]
            break

    if not service:
        return

    if all([iso_eventtime, status, service in available_services]):
        try:
            yield {
                'iso_eventtime': iso_eventtime[:-6].replace(' ', '_'),
                'query_time': int(float(query_time)) if query_time else 0,
                'status': status,
                'variants_len': int(float(variants_len)) if variants_len else 0,
                'service': service,
            }

        except ValueError:
            pass


def map_cache(available_services, record):
    iso_eventtime = record.get('iso_eventtime')
    qid = record.get('qid')
    service = qid.split(':')[1] if qid else None

    if all([iso_eventtime, service in available_services]):
        yield {
            'iso_eventtime': iso_eventtime[:-6].replace(' ', '_'),
            'service': service,
        }


def reduce(key, records):
    count = 0
    query_time_sum = 0
    query_time_cnt = 0

    variants_len_sum = 0
    variants_len_cnt = 0

    for r in records:
        query_time = int(r['query_time'])
        variants_len = int(r['variants_len'])
        count += 1

        if query_time > 0:
            query_time_sum += query_time
            query_time_cnt += 1

        if variants_len > 0:
            variants_len_sum += variants_len
            variants_len_cnt += 1

    yield {
        'iso_eventtime': key['iso_eventtime'],
        'service': key['service'],
        'status': key['status'],
        'query_time_avg': int(float(query_time_sum) / query_time_cnt) if query_time_cnt else 0,
        'query_time_cnt': query_time_cnt,
        'variants_len_avg': int(float(variants_len_sum) / variants_len_cnt) if variants_len_cnt else 0,
        'variants_len_cnt': variants_len_cnt,
        'count': count
    }


def reduce_final(key, records):
    count = 0
    for r in records:
        count = int(r['count'])

    yield {
        'iso_eventtime': key['iso_eventtime'],
        'service': key['service'],
        'status': key['status'],
        'count': count
    }


def reduce_cache(key, records):
    count = 0

    for r in records:
        count += 1

    yield {
        'iso_eventtime': key['iso_eventtime'],
        'service': key['service'],
        'count': count
    }


def main():
    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'travel.avia.stat_admin.local_settings')
    import django
    django.setup()

    import logging

    from travel.avia.stat_admin.data.models import SERVICES, ServiceMetric

    from travel.avia.stat_admin.lib.yt_helpers import configure_wrapper, yt_last_logs_tables
    from travel.avia.stat_admin.lib.logs import create_current_file_run_log, add_stdout_handler

    parser = argparse.ArgumentParser()
    parser.add_argument('-v', '--verbose', action='store_true')
    parser.add_argument('-d', '--days', default=30, type=int)
    args = parser.parse_args()

    create_current_file_run_log()

    log = logging.getLogger(__name__)

    if args.verbose:
        add_stdout_handler(log)

    log.info('Start')

    configure_wrapper(yt)

    logs_quantity = args.days
    SERVER_TIMEZONE = pytz.timezone('Europe/Moscow')

    def update_metric(event_name, service, event_datetime, val):
        metric, created = ServiceMetric.objects.get_or_create(
            service=service,
            event_datetime=event_datetime,
            event_name=event_name,
            defaults={
                'event_day': event_datetime.day,
                'event_month': event_datetime.month,
                'event_year': event_datetime.year,
                'value': val
            }
        )

        if not created:
            metric.value = val
            metric.save()

    # Begin main()
    source_tables = yt_last_logs_tables('//home/rasp/logs/rasp-partners-query-log', logs_quantity)
    tmp_table = yt.create_temp_table()

    yt.run_map_reduce(
        mapper=functools.partial(map, SERVICES),
        reducer=reduce,
        source_table=source_tables,
        destination_table=tmp_table,
        reduce_by=['service', 'iso_eventtime', 'status'],
    )

    for record in yt.read_table(tmp_table, format=yt.JsonFormat(), raw=False):
        iso_eventtime = SERVER_TIMEZONE.localize(
            datetime.strptime(record['iso_eventtime'], '%Y-%m-%d_%H')
        )

        query_time_avg = int(record['query_time_avg'])
        variants_len_avg = int(record['variants_len_avg'])
        partner_requests_count = int(record['count'])

        service = record['service']
        status = record['status']

        if status == 'got_reply':
            update_metric('daemon_query_time_avg', service, iso_eventtime, query_time_avg)
            update_metric('daemon_variants_len_avg', service, iso_eventtime, variants_len_avg)
            update_metric('daemon_requests_partners_avg', service, iso_eventtime, partner_requests_count)

    source_tables = yt_last_logs_tables('//home/rasp/logs/rasp-partners-cache-log', logs_quantity)

    yt.run_map_reduce(
        mapper=functools.partial(map_cache, SERVICES),
        reducer=reduce_cache,
        source_table=source_tables,
        destination_table=tmp_table,
        reduce_by=['service', 'iso_eventtime'],
    )

    for record in yt.read_table(tmp_table, format=yt.JsonFormat(), raw=False):
        iso_eventtime = SERVER_TIMEZONE.localize(
            datetime.strptime(record['iso_eventtime'], '%Y-%m-%d_%H')
        )

        service = record['service']
        cache_hit_count = record['count']

        update_metric('cache_hit', service, iso_eventtime, cache_hit_count)

    yt.remove(tmp_table)

    log.info('Done')


if __name__ == '__main__':
    main()
