#!/usr/bin/env python
# -*- coding: utf-8 -*-

import datetime
from os.path import join, basename

from crypta.profile.utils.loggers import get_file_logger
from crypta.profile.utils.yt_utils import get_yt_client
from crypta.profile.utils.config import config
from crypta.profile.utils.yql_utils import query as yql_query
from crypta.profile.tasks.monitoring.validation_by_sources.utils import calculate_stats_query_template

validation_fields_template = 'validation.validation_{segment} AS validation_{segment}, '

join_prediction_and_validation_template = """
$exact_socdem = ($exact_socdem) -> {{ RETURN Yson::ConvertToStringDict($exact_socdem) }};

INSERT INTO `{output_table}` WITH TRUNCATE
SELECT validation.yandexuid AS yandexuid,
    {validation_fields}
    $exact_socdem(prediction.exact_socdem)['gender'] AS predicted_gender,
    $exact_socdem(prediction.exact_socdem)['age_segment'] AS predicted_age,
    $exact_socdem(prediction.exact_socdem)['income_5_segment'] AS predicted_income
FROM CONCAT(
    {validation_tables}
) AS validation
INNER JOIN `{input_table}` AS prediction
USING (yandexuid);
"""

SOURCES_AGE_GENDER = ('passport', 'peoplesearch_vk', 'socialdb')
SOURCES_INCOME = ('delta_credit', 'beeline')
REGIONS = ('russia', 'moscow')
VALIDATION_PATTERN = '`//home/crypta/production/profiles/external-profiles/validation/{source}/{date}/{region}/sample`'


def main():
    logger, log_file_path = get_file_logger(
        name='custom_validation_by_sources',
        directory=join(config.TASKS_LOGS_DIRECTORY, str(datetime.date.today()))
    )

    yt = get_yt_client()

    for custom_sample in yt.list(join(config.SOCDEM_VALIDATION_DIR, 'custom_input'), absolute=True):
        if not yt.get_attribute(custom_sample, 'processed', default=None):
            with yt.Transaction() as transaction:
                logger.info('start processing sample {sample}'.format(sample=custom_sample))

                source = yt.get_attribute(custom_sample, 'source', default='all')
                if source != 'all' and source not in SOURCES_AGE_GENDER and source not in SOURCES_INCOME:
                    logger.warn('Invalid source. Valid sources are all, passport, peoplesearch_vk, socialdb, '
                                'delta_credit')
                    continue

                date_str = yt.get_attribute(custom_sample, 'date',
                                            default=yt.list(join(config.SOCDEM_VALIDATION_DIR, 'passport'))[-1])

                region = yt.get_attribute(custom_sample, 'region', default='all')
                if region == 'all':
                    regions = REGIONS
                elif region in REGIONS:
                    regions = (region,)
                else:
                    logger.warn('Invalid region. Valid regions are all, russia, moscow.')
                    continue

                logger.info('sample attributes have been taken')

                for region in regions:
                    if source == 'all':
                        validation_tables = ''
                        for source_dir in SOURCES_AGE_GENDER + SOURCES_INCOME:
                            if validation_tables:
                                validation_tables += ','
                            validation_tables += VALIDATION_PATTERN.format(source=source_dir, date=date_str,
                                                                           region=region)
                        segment_types = ('age', 'gender', 'income')
                    else:
                        validation_tables = VALIDATION_PATTERN.format(source=source, date=date_str, region=region)
                        if source in SOURCES_AGE_GENDER:
                            segment_types = ('age', 'gender')
                        elif source in SOURCES_INCOME:
                            segment_types = ('income',)

                    validation_fields = ''
                    for segment in segment_types:
                        validation_fields += validation_fields_template.format(segment=segment)

                    output_folder = join(config.SOCDEM_VALIDATION_DIR, 'custom_output', basename(custom_sample), region)

                    yql_query(
                        query_string=join_prediction_and_validation_template.format(
                            input_table=custom_sample,
                            validation_fields=validation_fields,
                            validation_tables=validation_tables,
                            output_table=join(output_folder, 'sample'),
                        ),
                        yt=yt,
                        logger=logger,
                        transaction=transaction,
                    )

                    logger.info('final validation sample has been made for {region}'.format(region=region))

                    for segment_type in segment_types:
                        yql_query(
                            query_string=calculate_stats_query_template.format(
                                input_table=join(output_folder, 'sample'),
                                segment_type=segment_type,
                                output_table=join(output_folder, segment_type),
                            ),
                            yt=yt,
                            logger=logger,
                            transaction=transaction,
                        )

                    logger.info('statistics have been calculated for {region}'.format(region=region))

                yt.set_attribute(custom_sample, 'processed', True)


if __name__ == '__main__':
    main()
