# -*- encoding: utf-8 -*-
import os

from datetime import datetime

import yt.wrapper as yt


def yt_last_logs_tables(path, days, skip_today=True):
    tables = []
    for table in yt.search(path, node_type="table"):
        try:
            table_date = datetime.strptime(table.split('/')[-1], '%Y-%m-%d').date()

        except ValueError:
            continue

        if table_date < datetime.now().date():
            tables.append((
                table_date,
                table
            ))

    sorded_tables = sorted(tables, reverse=True)[:days]

    return [t[1] for t in sorded_tables]


def error_map(record):
    if record['status'].startswith('5'):
        yield {
            'iso_eventtime': record['iso_eventtime'],
            'iso_eventdate': record['iso_eventtime'][:10],
            'request': record['request'].split('?')[0],
            'status': record['status']
        }


def yandexuid_map(record):
    if 'raw_yandexuid' in record:
        yield {
            'iso_eventtime': record['iso_eventtime'][:10],
            'raw_yandexuid': record['raw_yandexuid'],
        }


def yandexuid_reduce(key, records):
    yield {
        'iso_eventtime': key['iso_eventtime'],
        'raw_yandexuid': key['raw_yandexuid'],
    }


def yandexuid_count(key, records):
    count = 0
    for r in records:
        count += 1

    yield {
        'iso_eventtime': key['iso_eventtime'],
        'count': count,
    }


def search_reduce(key, records):
    count = 0
    min_iso_eventtime = None
    max_iso_eventtime = None

    for r in records:
        count += 1
        iso_eventtime = datetime.strptime(r['iso_eventtime'], '%Y-%m-%d %H:%M:%S')

        if not min_iso_eventtime or iso_eventtime < min_iso_eventtime:
            min_iso_eventtime = iso_eventtime

        if not max_iso_eventtime or iso_eventtime > max_iso_eventtime:
            max_iso_eventtime = iso_eventtime

    yield {
        'iso_eventdate': key['iso_eventdate'],
        'status': key['status'],
        'request': key['request'],
        'min_iso_eventtime': min_iso_eventtime.strftime('%Y-%m-%d %H:%M:%S'),
        'max_iso_eventtime': max_iso_eventtime.strftime('%Y-%m-%d %H:%M:%S'),
        'count': count
    }


def main():
    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'travel.avia.stat_admin.local_settings')
    import django
    django.setup()

    from django.db.transaction import atomic
    from django.conf import settings
    from django.utils import timezone

    import logging

    from travel.avia.stat_admin.data.models import Error500, YandexUID
    from travel.avia.stat_admin.lib.yt_helpers import configure_wrapper

    log = logging.getLogger(__name__)
    log.info('Start')

    configure_wrapper(yt)
    os.environ['YT_TOKEN'] = settings.YT_TOKEN

    @atomic
    def save_results(tmp_table):
        for record in yt.read_table(tmp_table, format=yt.JsonFormat(), raw=False):
            current_tz = timezone.get_current_timezone()

            iso_eventdate = current_tz.localize(
                datetime.strptime(record['iso_eventdate'], '%Y-%m-%d')
            ).date()

            min_eventtime = current_tz.localize(
                datetime.strptime(record['min_iso_eventtime'], '%Y-%m-%d %H:%M:%S')
            )

            max_eventtime = current_tz.localize(
                datetime.strptime(record['max_iso_eventtime'], '%Y-%m-%d  %H:%M:%S')
            )

            error_500, created = Error500.objects.get_or_create(
                eventdate=iso_eventdate,
                code=int(record['status']),
                min_eventtime=min_eventtime,
                max_eventtime=max_eventtime,
                request=record['request'],
                defaults={
                    'count': int(record['count'])
                }

            )

            if not created:
                error_500.count = int(record['count'])

            error_500.save()

    @atomic
    def save_yandexuids(tmp_table):
        for record in yt.read_table(tmp_table, format=yt.JsonFormat(), raw=False):
            current_tz = timezone.get_current_timezone()

            iso_eventdate = current_tz.localize(
                datetime.strptime(record['iso_eventtime'], '%Y-%m-%d')
            ).date()

            yandexuid, created = YandexUID.objects.get_or_create(
                eventdate=iso_eventdate,
                service='ticket',
                national_version='ru',
                defaults={
                    'count': int(record['count'])
                }
            )

            if not created:
                yandexuid.count = int(record['count'])

            yandexuid.save()

    # FIXME: move to options
    logs_quantity = 1

    source_tables = yt_last_logs_tables('//statbox/access-log', logs_quantity, skip_today=True)

    intermediate_table = yt.create_temp_table(
        path=settings.YT_TMP_DIRECTORY,
        prefix='avia_stats_'
    )

    tmp_table = yt.create_temp_table(
        path=settings.YT_TMP_DIRECTORY,
        prefix='avia_stats_'
    )

    sub_data = [
        'yt',
        # TODO(mikhailche): check grep expression if this script is not dead yet
        'map', '\"grep \'canonized_vhost=avia\.yandex\.ru\' || true\"',  # noqa
        '--dst', intermediate_table,
        '--proxy', settings.YT_PROXY,
        '--format', 'dsv',
    ]

    shell_cmd = ''

    for source_table in source_tables:
        sub_data.append('--src')
        sub_data.append(source_table)

    # FIXME: почему-то не хочет запускаться через subprocess
    for sd in sub_data:
        shell_cmd = shell_cmd + ' %s' % sd

    os.system(shell_cmd.strip())

    yt.run_map_reduce(
        mapper=error_map,
        reducer=search_reduce,
        source_table=intermediate_table,
        destination_table=tmp_table,
        reduce_by=['iso_eventdate', 'status', 'request'],
    )

    save_results(tmp_table)

    yt.run_map_reduce(
        mapper=yandexuid_map,
        reducer=yandexuid_reduce,
        source_table=intermediate_table,
        destination_table=tmp_table,
        reduce_by=['iso_eventtime', 'raw_yandexuid'],
    )

    yt.run_map_reduce(
        mapper=None,
        reducer=yandexuid_count,
        source_table=tmp_table,
        destination_table=tmp_table,
        reduce_by=['iso_eventtime'],
    )

    save_yandexuids(tmp_table)
    yt.remove(tmp_table)

    log.info("Done")


if __name__ == '__main__':
    main()
