# -*- coding: utf-8 -*-
"""Routine to detect, compute and upload stability distribution
"""
import os
import re
from datetime import datetime, timedelta
import yt.wrapper as yt_wrapper
from yt.wrapper import ypath_split
from datacloud.dev_utils.time.utils import assert_date_str

from datacloud.dev_utils.time import patterns
from datacloud.dev_utils.time.utils import now_str

from datacloud.stability.requests_to_features import RequestsToFeaturesConverter
from datacloud.stability import stability, segment_stability, transfer_stability_to_ydb as transfer
from datacloud.stability.metafeatures_stability import MetafeatureDistributionHandler

from datacloud.stability.stability_record import StabilityRecord, DistStabMngr, DistStabTable
from datacloud.dev_utils.yt.yt_utils import get_yt_client
from datacloud.dev_utils.yt.converter import TableConverter
from datacloud.dev_utils.time.patterns import RE_DAILY_LOG_FORMAT
from datacloud.dev_utils.status_db.task import Task, Status

from datacloud.stability.metafeatures_stability import DISTRIB_TYPE_DAILY, DISTRIB_TYPE_SEGMENT

from datacloud.config.yt import YT_PROXY

from datacloud.dev_utils.logging.logger import get_basic_logger
logger = get_basic_logger(__name__)


N_BINS = 100
INPUT_LOGS_FOLDER = '//home/x-products/production/services/logs/datacloud_score_api/production/events/last_ndays'
STABILITY_FOLDER = '//home/x-products/production/datacloud/stability/feature_distribution_stability'
LAST_NDAYS_LOGS = '//home/x-products/production/services/logs/datacloud_score_api/production/events/last_ndays'
DAILY_SUFFIX = '_daily'
SEGMENT_SUFFIX = '_segment'

M_DAYS_TO_TEST = 5

TCS_STABILITY_RECORD = StabilityRecord(
    partner_id='tcs',
    score_id='tcs_xprod_987_20181212',
    features_table='//home/x-products/production/datacloud/models/tcs/tcs_xprod_987_20181212/features-for-stabiity-computation'
)

YDB_STABILITY_TABLE = '/ru/impulse/production/xprod-scoring-api-db/score_api/stability/stability'


def detect_ready_compute_stability(date_time, days=None):
    days = days or 7
    date_time = datetime.strptime(date_time, patterns.FMT_DATE_HM)

    for min_diff in xrange(days + 1):
        time_to_check = date_time - timedelta(days=min_diff)
        logger.info('Check day: {}'.format(time_to_check))
        date_str = time_to_check.strftime(patterns.FMT_DATE)
        table = '{}/{}'.format(INPUT_LOGS_FOLDER, date_str)
        key = date_str
        if yt_wrapper.exists(table) and yt_wrapper.row_count(table) > 0:
            data = {
                'table_path': table,
                'date_str': date_str
            }
            yield key, data


def detect_ready_compute_test_stability(date_time):
    tables_for_monitoring = DistStabTable().get_tables_for_monitoring()
    table = True
    for partner_id, score_id, table, date_str in tables_for_monitoring:
        key = 'Score [{}]/[{}] on [{}]'.format(partner_id, score_id, date_str)
        yield key, {
            'stability_record_params': {
                'partner_id': partner_id,
                'score_id': score_id,
                'features_table': table
            },
            'date': date_str
        }


class CRequestsToTcsStability(TableConverter):
    def __init__(self, yt_client, stability_rec):
        super(CRequestsToTcsStability, self).__init__(yt_client)
        self._stability_rec = stability_rec

    def _convert(self, input_table, output_table):
        requests_to_features = RequestsToFeaturesConverter(self._yt_client, self._stability_rec)
        features_to_metafeatures = stability.FeaturesToMetaFeaturesConverter(
            self._yt_client,
            clf_yt_path='//home/x-products/production/datacloud/bins/partner_models/tcs/tcs_xprod_987_20181212/current',
            list_groups_yt_path='//home/x-products/production/datacloud/bins/partner_models/tcs/additional/sklearn181_bins_for_987_20181212.pkl'
        )
        features_to_stability_bins = stability.FeaturesToStabilityBinsConverter(self._yt_client, n_bins=N_BINS)

        # TODO: Отдельно сохранить таблицу Requests to features
        # Отдельными задачами посчитать по ней общую стабильность и стабильность по сегментам
        transformer = requests_to_features | features_to_metafeatures | features_to_stability_bins
        transformer(input_table, output_table)


def run_get_test_features_from_requests(task):
    """
    Functions for any score_table takes M_DAYS_TO_TEST
    of not so distant logs and collect features for
    requested IDs
    """
    new_tasks = []
    date_str = task.data['date']
    yt_client = get_yt_client()
    stability_record = StabilityRecord(**task.data['stability_record_params'])
    stability_manager = DistStabMngr(stability_record, is_testing_mode=True)
    last_ndays_log_tables = sorted(yt_client.list(LAST_NDAYS_LOGS, absolute=True))[::-1]
    matched_logs_counter = 0
    for log_table in last_ndays_log_tables:
        log_date = ypath_split(log_table)[-1]
        if re.match(RE_DAILY_LOG_FORMAT, log_date) and date_str >= log_date:
            output_table = stability_manager.make_features_from_requests_table_path(log_date)
            RequestsToFeaturesConverter(yt_client, stability_record)(log_table, output_table)
            current_time = now_str()
            new_tasks.extend([
                Task(
                    program='compute_test_daily_stability',
                    key=task.key + 'for requests from [{}]'.format(log_date),
                    status=Status.READY,
                    data={
                        'stability_record_params': stability_record.to_json(),
                        'log_date': log_date,
                        'features_distribution_table': output_table
                    },
                    create_time=current_time,
                    update_time=current_time
                ),
                Task(
                    program='compute_test_segment_stability',
                    key=task.key + 'for requests from [{}]'.format(log_date),
                    status=Status.READY,
                    data={
                        'stability_record_params': stability_record.to_json(),
                        'log_date': log_date,
                        'features_distribution_table': output_table
                    },
                    create_time=current_time,
                    update_time=current_time
                )
            ])
            matched_logs_counter += 1
            if matched_logs_counter >= M_DAYS_TO_TEST:
                break

    current_time = now_str()
    if matched_logs_counter > 0:
        new_tasks.append(task.make_done())
        if matched_logs_counter < M_DAYS_TO_TEST:
            logger.warn('Not enough days for '
                        'proper check of score '
                        'table[{}]. Only {} days '
                        'were found.'.format(stability_record.features_table, matched_logs_counter))
    else:
        logger.warn('Not enough days for proper '
                    'check of score table[{}]. {} '
                    'days were found. \n Program will be '
                    'marked as SKIPPED in status_db'.format(stability_record.features_table, matched_logs_counter))
        new_tasks.append(task.make_done(new_status=Status.SKIPPED, update_time=current_time))
    return new_tasks


def run_get_features_from_requests(task, stability_record=TCS_STABILITY_RECORD):
    assert 'table_path' in task.data, 'table_path expected in task.data'
    assert_date_str(task.data['date_str'])

    input_table = task.data['table_path']
    date_str = task.data['date_str']

    yt_client = get_yt_client()
    stability_manager = DistStabMngr(stability_record)

    output_table = stability_manager.make_features_from_requests_table_path(date_str)
    RequestsToFeaturesConverter(yt_client, stability_record)(input_table, output_table)
    current_time = now_str()

    new_tasks = [
        task.make_done(),
        Task(
            'compute_daily_stability',
            date_str,
            Status.READY,
            {
                'table_path': output_table,
                'date_str': date_str,
                'stability_record_params': stability_record.to_json(),
            },
            current_time,
            current_time
        ),
        Task(
            'compute_segment_stability',
            date_str,
            Status.READY,
            {
                'table_path': output_table,
                'date_str': date_str,
                'stability_record_params': stability_record.to_json(),
            },
            current_time,
            current_time
        )
    ]
    return new_tasks


def run_compute_test_daily_stability(task):
    date_str = task.data['log_date']
    stability_record = StabilityRecord(**task.data['stability_record_params'])
    stability_manager = DistStabMngr(stability_record, is_testing_mode=True)
    yt_client = get_yt_client()
    output_table = stability_manager.make_daily_distribution_table_path(date_str)

    features_to_metafeatures = stability.FeaturesToMetaFeaturesConverter(
        yt_client,
        clf_yt_path=stability_manager.clf_yt_path,
        list_groups_yt_path=stability_manager.list_groups_yt_path
    )
    features_to_stability = stability.FeaturesToStabilityBinsConverter(yt_client, n_bins=N_BINS)

    transformer = features_to_metafeatures | features_to_stability
    transformer(task.data['features_distribution_table'], output_table)

    current_time = now_str()
    new_tasks = [
        task.make_done(),
        Task(
            'calculate_test_stability_distribution_psi',
            task.key + DAILY_SUFFIX,
            Status.READY,
            {
                'stability_record_params': stability_record.to_json(),
                'table_path': output_table,
                'date': date_str,
                'type': DISTRIB_TYPE_DAILY
            },
            current_time,
            current_time
        )
    ]
    return new_tasks


def run_compute_test_segment_stability(task):
    date_str = task.data['log_date']
    stability_record = StabilityRecord(**task.data['stability_record_params'])
    stability_manager = DistStabMngr(stability_record, is_testing_mode=True)
    yt_client = get_yt_client()
    output_table = stability_manager.make_segment_distribution_table_path(date_str)

    features_to_metafeatures = stability.FeaturesToMetaFeaturesConverter(
        yt_client,
        clf_yt_path=stability_manager.clf_yt_path,
        list_groups_yt_path=stability_manager.list_groups_yt_path
    )
    expand_by_segment = segment_stability.CExpandBySegment(yt_client)
    stability_segment = segment_stability.CStabilitySegment(yt_client, stability_record)

    transformer = features_to_metafeatures | expand_by_segment | stability_segment
    transformer(task.data['features_distribution_table'], output_table)
    current_time = now_str()
    new_tasks = [
        task.make_done(),
        Task(
            'calculate_test_stability_distribution_psi',
            task.key + SEGMENT_SUFFIX,
            Status.READY,
            {
                'stability_record_params': stability_record.to_json(),
                'table_path': output_table,
                'date': date_str,
                'type': DISTRIB_TYPE_SEGMENT
            },
            current_time,
            current_time
        )
    ]
    return new_tasks


def run_compute_daily_stability(task):
    assert 'table_path' in task.data, 'table_path expected in task.data'
    assert_date_str(task.data['date_str'])

    input_table = task.data['table_path']
    date_str = task.data['date_str']
    yt_client = get_yt_client()
    stability_record = StabilityRecord(**task.data['stability_record_params'])
    stability_manager = DistStabMngr(stability_record)
    output_table = stability_manager.make_daily_distribution_table_path(date_str)

    features_to_metafeatures = stability.FeaturesToMetaFeaturesConverter(
        yt_client,
        clf_yt_path=stability_manager.clf_yt_path,
        list_groups_yt_path=stability_manager.list_groups_yt_path
    )
    features_to_stability = stability.FeaturesToStabilityBinsConverter(yt_client, n_bins=N_BINS)

    transformer = features_to_metafeatures | features_to_stability
    transformer(input_table, output_table)
    current_time = now_str()

    new_tasks = [
        task.make_done(),
        Task('upload_stability_to_ydb', 'daily#' + date_str, Status.READY, {'table_path': output_table, 'date_str': date_str}, current_time, current_time),
        Task(
            'calculate_stability_distribution_psi',
            date_str, Status.READY,
            {
                'stability_record_params': stability_record.to_json(),
                'table_path': output_table,
                'date': date_str,
                'type': DISTRIB_TYPE_DAILY
            },
            current_time,
            current_time
        )
    ]
    return new_tasks


def run_compute_segment_stability(task):
    assert 'table_path' in task.data, 'table_path expected in task.data'
    assert_date_str(task.data['date_str'])
    input_table = task.data['table_path']
    date_str = task.data['date_str']
    yt_client = get_yt_client()
    stability_record = StabilityRecord(**task.data['stability_record_params'])
    stability_manager = DistStabMngr(stability_record)
    output_table = stability_manager.make_segment_distribution_table_path(date_str)
    features_to_metafeatures = stability.FeaturesToMetaFeaturesConverter(
        yt_client,
        clf_yt_path=stability_manager.clf_yt_path,
        list_groups_yt_path=stability_manager.list_groups_yt_path
    )
    expand_by_segment = segment_stability.CExpandBySegment(yt_client)
    stability_segment = segment_stability.CStabilitySegment(yt_client, TCS_STABILITY_RECORD)

    transformer = features_to_metafeatures | expand_by_segment | stability_segment
    transformer(input_table, output_table)

    current_time = now_str()

    new_tasks = [
        task.make_done(),
        Task('upload_stability_to_ydb', 'segment#' + date_str, Status.READY, {'table_path': output_table, 'date_str': date_str}, current_time, current_time),
        Task(
            'calculate_stability_distribution_psi',
            date_str, Status.READY,
            {
                'stability_record_params': stability_record.to_json(),
                'table_path': output_table,
                'date': date_str,
                'type': DISTRIB_TYPE_SEGMENT
            },
            current_time,
            current_time
        )
    ]
    return new_tasks


def run_calculate_stability_distribution_psi(task):
    MetafeatureDistributionHandler(get_yt_client(), task)()
    new_tasks = [task.make_done()]
    return new_tasks


def run_calculate_test_stability_distribution_psi(task):
    MetafeatureDistributionHandler(get_yt_client(), task, is_testing_mode=True)()
    new_tasks = [task.make_done()]
    return new_tasks


def run_upload_stability_to_ydb(task):
    expected_program = 'upload_stability_to_ydb'
    assert task.program == expected_program, 'Wrong program {}, expected {}'.format(task.program, expected_program)
    assert 'table_path' in task.data, 'table_path expected in task.data'
    assert_date_str(task.data['date_str'])
    input_table = task.data['table_path']
    date_str = task.data['date_str']
    yt_client = get_yt_client(YT_PROXY)
    yql_token = yt_wrapper.config['token'] or os.environ.get('YQL_TOKEN', '')
    assert yql_token, 'Please provide YQL token'

    prepared_to_transfer_table_path = '//tmp/r9-test-stability-rec-{}-{}'.format(date_str, now_str())
    transfer.CStabilityToTransferRec(yt_client, TCS_STABILITY_RECORD.partner_id, TCS_STABILITY_RECORD.score_id, date_str)(
        input_table,
        prepared_to_transfer_table_path
    )
    transfer.sample_stability_trasnfer(yt_client, prepared_to_transfer_table_path, YDB_STABILITY_TABLE, yql_token=yql_token)

    new_tasks = [task.make_done(update_time=now_str())]
    return new_tasks
