# -*- encoding: utf-8 -*-
import travel.avia.admin.init_project  # noqa

import logging
import os
from collections import defaultdict
from datetime import datetime, timedelta
from itertools import izip_longest
from optparse import OptionParser

import yt.wrapper as yt
import yt.logger_config as yt_logger_config
import yt.logger as yt_logger
from django.conf import settings
from django.utils.encoding import force_text

from travel.avia.admin.lib.logs import add_stdout_handler, create_current_file_run_log
from travel.avia.admin.lib.yt_helpers import configure_wrapper

log = logging.getLogger(__name__)
create_current_file_run_log()

REDIR_ROOT = '//home/avia/logs/avia-json-redir-log'
FROUD_ROOT = '//home/balance/{env}/stat_aggregator/avia/fraud'
DESTINATION_ROOT = '//home/avia/logs/avia-redir-balance-by-day-log'
DESTINATION_TMP_ROOT = '//home/avia/tmp/avia-redir-balance-by-day-log'

UNIQ_KEY_FIELDS = sorted(
    ['billing_order_id', 'clid', 'datasource_id', 'eventtime', 'host', 'iso_eventtime',
     'national', 'passportuid', 'pp', 'show_id', 'source_uri',
     'tskv_format', 'unixtime', 'url', 'userip', 'yandexuid']
)

ALLOWED_ENVS = ['production', 'dev']


def make_key(row):
    return tuple([force_text(row.get(f)) if row.get(f) else '' for f in UNIQ_KEY_FIELDS])


def keys_to_upper(row):
    return {k.upper(): v for k, v in row.items()}


def get_froud_root(environment):
    return FROUD_ROOT.format(env='prod' if environment == 'production' else 'dev')


def get_destination_root(environment):
    return DESTINATION_ROOT if environment == 'production' else DESTINATION_TMP_ROOT


def read_rows(table):
    rows = defaultdict(list)
    records_count = 0
    duplicates_count = 0

    log.info('Read: %s', table)
    for row in yt.read_table(table, format=yt.JsonFormat(), raw=False):
        row_key = make_key(row)
        records_count += 1

        if row_key in rows:
            duplicates_count += 1

        rows[row_key].append(row)

    log.info('Readed %s records with %s duplicates', records_count, duplicates_count)

    return rows


def merge_logs(right_day, days_num, environment):
    for x in range(days_num):
        day_str = (right_day - timedelta(days=x)).strftime('%Y-%m-%d')
        log.info('Process %s', day_str)

        redir_log = os.path.join(REDIR_ROOT, day_str)

        froud_log = os.path.join(get_froud_root(environment), day_str)
        destination_log = os.path.join(get_destination_root(environment), day_str)

        if not yt.exists(redir_log):
            log.info('Redir log not found %s', redir_log)
            continue

        if not yt.exists(froud_log):
            log.info('Froud log not found %s', froud_log)
            continue

        redir_log_rows = read_rows(redir_log)
        froud_log_rows = read_rows(froud_log)

        destination_log_rows = []
        for row_key, rows in redir_log_rows.iteritems():
            froud_rows = froud_log_rows.pop(row_key, [])

            if len(froud_rows) > len(rows):
                log.error("Froud > redir: %s > %s, key: %s", len(froud_rows), len(rows), row_key)
                continue

            for redir_row, froud_row in izip_longest(rows, froud_rows, fillvalue={}):
                redir_row['filter_name'] = froud_row.get('filter_name')
                redir_row['filter'] = 1 if froud_row else 0
                destination_log_rows.append(keys_to_upper(redir_row))

        log.info('Write: %s; unparsed: %s', destination_log, len(froud_log_rows))
        write_destination_log(destination_log, destination_log_rows)


def write_destination_log(destination_log, destination_log_rows):
    with yt.Transaction():
        if yt.exists(destination_log):
            yt.remove(destination_log)

        create_destination_table(destination_log)
        yt.write_table(destination_log, destination_log_rows, format=yt.JsonFormat())


def create_destination_table(destination_log):
    date = destination_log.rsplit('/', 1)[1]
    source_root = os.path.join(REDIR_ROOT, date)
    schema = yt.get_attribute(source_root, 'schema', None)
    attributes = {}
    if schema:
        attributes = {
            'schema': update_schema(schema),
            'optimize_for': 'scan',
        }

    yt.create('table', destination_log, recursive=True, attributes=attributes)


def update_schema(schema):
    # To upper case
    new_schema = [
        {'name': item['name'].upper(), 'type': item['type']}
        for item in schema
    ]

    # Add fraud fields
    return new_schema + [
        {'name': 'FILTER', 'type': 'int64'},
        {'name': 'FILTER_NAME', 'type': 'string'}
    ]


def enabled_env():
    current_env = settings.ENVIRONMENT
    enabled = current_env in ALLOWED_ENVS

    if not enabled:
        allowed_envs_str = ', '.join(ALLOWED_ENVS)
        log.info('Current ENVIRONMENT %s. Run only %s allowed.', current_env, allowed_envs_str)

    return enabled


def main():
    optparser = OptionParser()

    optparser.add_option('-v', '--verbose', action='store_true')
    optparser.add_option('-p', '--proxy', dest='proxy', default=settings.YT_PROXY)
    optparser.add_option('-d', '--days', type='int', dest='days', default=3)
    optparser.add_option('-r', '--right_day', dest='right_day')

    options, args = optparser.parse_args()

    if options.verbose:
        add_stdout_handler(log)

    else:
        yt_logger_config.LOG_LEVEL = 'WARNING'
        reload(yt_logger)

    if options.right_day:
        options.right_day = datetime.strptime(options.right_day, '%Y-%m-%d')
    else:
        options.right_day = datetime.now().date()

    configure_wrapper(yt)
    if options.proxy != settings.YT_PROXY:
        yt.config['proxy']['url'] = options.proxy

    log.info('Start')

    if enabled_env():
        merge_logs(options.right_day, options.days, settings.ENVIRONMENT)

    log.info('Done')
