# -*- coding: utf-8 -*-
from datetime import datetime
from yt.wrapper import ypath_join

from datacloud.dev_utils.yt.yt_utils import get_yt_client
from datacloud.dev_utils.id_value.id_value_lib import count_md5
from datacloud.dev_utils.time.patterns import FMT_DATE_HMSFZ
from datacloud.dev_utils.logging.logger import get_basic_logger

SCORE_API_LOGS_DIR = '//home/x-products/production/services/logs/datacloud_score_api/production/events/history'
SCORE_API_AGGREGATES_FOLDER = '//home/x-products/production/partners_data/score_api_audience'
SCORE_API_LOGS_FORMATED = ypath_join(SCORE_API_AGGREGATES_FOLDER, 'logs_formated')
SCORE_API_EVENTS = ypath_join(SCORE_API_AGGREGATES_FOLDER, 'events')

logger = get_basic_logger(__name__)


def logs_format_mapper(rec):
    context = rec['context']
    query = context.get('query', {})

    if type(query) is not dict:
        query = {}
    user_ids = query.get('user_ids', {})
    if type(user_ids) is not dict:
        user_ids = {}

    emails, email_id_vals = [], []
    for email in user_ids.get('emails', []):
        if 'email' in email:
            emails.append(email['email'])
        if 'id_value' in email:
            email_id_vals.append(email['id_value'])

    phones, phone_id_vals = [], []
    for phone in user_ids.get('phones', []):
        if 'phone' in phone:
            phones.append(phone['phone'])
        if 'id_value' in phone:
            phone_id_vals.append(phone['id_value'])

    cookies = []
    for cookie in user_ids.get('cookies', []):
        cookies.append(cookie['cookie_vendor'] + '#' + cookie['cookie'])

    partner_id = context.get('partner_id')

    external_id = user_ids.get('external_id')
    request_id = context.get('request_id')

    yield {
        'timestamp': rec['timestamp'],
        'external_id': external_id,
        'request_id': request_id,
        'partner_id': partner_id,
        'emails': emails,
        'email_id_vals': email_id_vals,
        'phones': phones,
        'phone_id_vals': phone_id_vals,
        'cookies': cookies
    }


def remove_if_exists(yt_client, table):
    if yt_client.exists(table):
        yt_client.remove(table)


def should_update(yt_client, table, seconds_thresh):
    if not yt_client.exists(table):
        return True

    modification_time = yt_client.get_attribute(table, 'modification_time')
    modification_dt = datetime.strptime(modification_time, FMT_DATE_HMSFZ)
    seconds_diff = (datetime.now() - modification_dt).total_seconds()

    return seconds_diff > seconds_thresh


def formated2events_mapper(rec):
    date_str = rec['timestamp'].split('T')[0]
    email_id_vals = rec['email_id_vals']
    email_id_vals.extend([count_md5(email) for email in rec['emails']])

    phone_id_vals = rec['phone_id_vals']
    phone_id_vals.extend([count_md5(phone) for phone in rec['phones']])

    id_val_by_type = {
        'phone_md5': phone_id_vals,
        'email_md5': email_id_vals
    }

    external_id = rec.get('external_id')
    if not external_id:
        external_id = rec.get('request_id')

    for id_type in ('phone_md5', 'email_md5'):
        for id_val in id_val_by_type[id_type]:
            yield {
                'retro_date': date_str,
                'id_type': id_type,
                'id_value': id_val,
                'partner_id': rec['partner_id'],
                'external_id': external_id,
                'target': -1
            }


def build_score_api_aggregates(seconds_thresh=60 * 60 * 10):
    yt_client = get_yt_client()

    formated_log = yt_client.TablePath(
        SCORE_API_LOGS_FORMATED,
        attributes={
            'schema': [
                {'name': 'timestamp', 'type': 'string'},
                {'name': 'external_id', 'type': 'string'},
                {'name': 'request_id', 'type': 'string'},
                {'name': 'partner_id', 'type': 'string'},
                {'name': 'emails', 'type': 'any'},
                {'name': 'email_id_vals', 'type': 'any'},
                {'name': 'phones', 'type': 'any'},
                {'name': 'phone_id_vals', 'type': 'any'},
                {'name': 'cookies', 'type': 'any'},
            ],
            'optimize_for': 'scan',
            'compression_codec': 'brotli_3'
        }
    )

    log_events = yt_client.TablePath(
        SCORE_API_EVENTS,
        attributes={
            'schema': [
                {'name': 'retro_date', 'type': 'string'},
                {'name': 'id_type', 'type': 'string'},
                {'name': 'id_value', 'type': 'string'},
                {'name': 'partner_id', 'type': 'string'},
                {'name': 'external_id', 'type': 'string'},
                {'name': 'target', 'type': 'int64'},
            ],
            'optimize_for': 'scan',
            'compression_codec': 'brotli_3'
        }
    )

    should_update_log = should_update(yt_client, formated_log, seconds_thresh)
    should_update_events = should_update(yt_client, log_events, seconds_thresh)
    if not should_update_log and not should_update_events:
        logger.info('Time for update hasn\'t come')
        return

    logger.info('Started prepare score api log aggregates')
    with yt_client.Transaction():
        remove_if_exists(yt_client, formated_log)

        all_score_api_logs = yt_client.list(SCORE_API_LOGS_DIR, absolute=True)
        yt_client.run_map(
            logs_format_mapper,
            all_score_api_logs,
            formated_log,
            spec={'title': 'Format score API logs'}
        )
        yt_client.run_sort(
            formated_log,
            sort_by='timestamp',
            spec={'title': 'Format score API logs / sort after'}
        )

        remove_if_exists(yt_client, log_events)
        yt_client.run_map(
            formated2events_mapper,
            formated_log,
            log_events,
            spec={'title': 'Build score API events'}
        )
        yt_client.run_sort(
            log_events,
            sort_by='retro_date',
            spec={'title': 'Build score API events / sort after'}
        )
