# -*- coding: utf-8 -*-

import os

import luigi

from crypta.profile.lib import date_helpers

from crypta.profile.utils.config import config
from crypta.profile.utils.socdem import get_year_from_birth_date, is_valid_birth_date, socdem_storage_schema
from crypta.profile.utils.luigi_utils import BaseYtTask, YtDailyRewritableTarget, ExternalInput
from crypta.profile.tasks.common.merge_socdem_storage import MergeSocdemStorage


class PassportMapper(object):
    def __init__(self, current_date):
        self.current_year = int(current_date.split('-')[0])
        self.update_time = date_helpers.from_utc_date_string_to_timestamp(current_date)

    def __call__(self, row):
        socdem_row = {
            'id': row['uid'],
            'id_type': 'puid',
            'update_time': self.update_time,
            'source': 'passport',
        }

        if row.get('birthday') and is_valid_birth_date(row.get('birthday'), self.current_year):
            socdem_row['birth_date'] = row['birthday']
            socdem_row['year_of_birth'] = get_year_from_birth_date(socdem_row['birth_date'])

        if row.get('gender') and row.get('gender') in ('m', 'f'):
            socdem_row['gender'] = row['gender']

        if 'gender' in socdem_row or 'birth_date' in socdem_row:
            yield socdem_row


def remove_duplicates(_, rows):
    yield next(rows)


class PassportSocdem(BaseYtTask):
    date = luigi.Parameter()
    juggler_host = config.CRYPTA_ML_JUGGLER_HOST
    task_group = 'import_socdem_data'

    def requires(self):
        return {
            'MergeSocdemStorage': MergeSocdemStorage(date_helpers.get_tomorrow(self.date)),  # prevent overwriting after validation sample creation
            'PassportDump': ExternalInput(os.path.join(config.PASSPORT_DUMP_DIRECTORY, date_helpers.get_yesterday(self.date))),
        }

    def output(self):
        return YtDailyRewritableTarget(
            os.path.join(
                config.SOCDEM_STORAGE_YT_DIR,
                'puid',
                'passport',
            ),
            self.date,
        )

    def run(self):
        with self.yt.Transaction():
            self.yt.create_empty_table(
                self.output().table,
                schema=socdem_storage_schema,
            )
            self.yt.run_map_reduce(
                PassportMapper(self.date),
                remove_duplicates,
                self.yt.TablePath(
                    self.input()['PassportDump'].table,
                    columns=('uid', 'birthday', 'gender'),
                ),
                self.output().table,
                reduce_by='id',
            )

            self.yt.set_attribute(
                self.output().table,
                'generate_date',
                self.date,
            )
