#!/usr/bin/env python
# -*- coding: utf-8 -*-

from functools import partial
import os
import random

import luigi
from yt.wrapper import create_table_switch

from crypta.profile.utils.config import config
from crypta.profile.utils.loggers import TimeTracker
from crypta.profile.utils.luigi_utils import BaseYtTask, ExternalInput, YtTarget
from crypta.profile.utils.socdem import get_age_segment_from_birth_date

ROWS_TO_SLICE = {
    'passport': 5e5,
    'peoplesearch_vk': 5e5,
    'socialdb': 3e5,
}


def get_validation_sample(row, slice_ratio):
    if random.random() > slice_ratio:
        yield create_table_switch(0)  # correction_sample
        yield row
    else:
        yield create_table_switch(1)  # validation_sample
        yield {
            'id': row['id'],
            'id_type': row['id_type'],
            'validation_gender': row['gender'],
            'validation_age': get_age_segment_from_birth_date(row['birth_date']) if row['birth_date'] else None,
        }


class MakeSocdemValidationSamples(BaseYtTask):
    task_group = 'validation_sample_building'
    priority = 100
    date = luigi.Parameter()

    def requires(self):
        return {
            'passport': ExternalInput(
                table=os.path.join(config.SOCDEM_STORAGE_YT_DIR, 'puid', 'passport'),
            ),
            'peoplesearch_vk': ExternalInput(
                table=os.path.join(config.SOCDEM_STORAGE_YT_DIR, 'vk_id', 'peoplesearch_vk'),
            ),
            'socialdb': ExternalInput(
                table=os.path.join(config.SOCDEM_STORAGE_YT_DIR, 'puid', 'socialdb'),
            ),
        }

    def output(self):
        return {
            'passport': YtTarget(
                table=os.path.join(config.SOCDEM_VALIDATION_DIR, 'passport', self.date, 'sample'),
            ),
            'peoplesearch_vk': YtTarget(
                table=os.path.join(config.SOCDEM_VALIDATION_DIR, 'peoplesearch_vk', self.date, 'sample'),
            ),
            'socialdb': YtTarget(
                table=os.path.join(config.SOCDEM_VALIDATION_DIR, 'socialdb', self.date, 'sample'),
            ),
        }

    def run(self):
        with TimeTracker(monitoring_name=self.__class__.__name__):
            with self.yt.Transaction():

                for source, output_target in self.output().iteritems():
                    self.yt.create_directory(os.path.dirname(output_target.table))

                    self.yt.create_empty_table(
                        output_target.table,
                        schema={
                            'id': 'string',
                            'id_type': 'string',
                            'validation_gender': 'string',
                            'validation_age': 'string',
                        },
                    )

                    with self.yt.TempTable() as temporary_source_without_sample:
                        self.yt.create_empty_table(
                            temporary_source_without_sample,
                            schema=self.yt.get_yt_schema_dict_from_table(self.input()[source].table),
                        )

                        slice_ratio = ROWS_TO_SLICE[source] / self.yt.row_count(self.input()[source].table)
                        self.yt.run_map(
                            partial(get_validation_sample, slice_ratio=slice_ratio),
                            self.input()[source].table,
                            [
                                temporary_source_without_sample,
                                self.output()[source].table,
                            ],
                        )

                        self.yt.set_attribute(
                            os.path.join(config.SOCDEM_VALIDATION_DIR, source, self.date),
                            'processed',
                            False,
                        )

                        self.yt.copy(temporary_source_without_sample, self.input()[source].table, force=True)
