# -*- coding: utf-8 -*-

import os

import luigi

from crypta.profile.lib import date_helpers

from crypta.profile.utils.config import config
from crypta.profile.utils.luigi_utils import BaseYtTask, ExternalInput, YtDailyRewritableTarget
from crypta.profile.utils.socdem import socdem_storage_schema, is_valid_birth_date, get_year_from_birth_date

from crypta.profile.tasks.common.merge_socdem_storage import MergeSocdemStorage


query_template = """
$only_watson = (
SELECT *
FROM `{watson}` as watson
LEFT ONLY JOIN `{people_search}` as people_search
USING(id)
);

$only_ps = (
SELECT *
FROM `{people_search}` as people_search
LEFT ONLY JOIN `{watson}` as watson
USING(id)
);

$both = (
SELECT
    watson.id as id,
    watson.id_type as id_type,
    watson.year_of_birth as year_of_birth,
    people_search.gender as gender,
    watson.birth_date as birth_date,
    watson.update_time as watson_update_time,
    people_search.update_time as people_search_update_time,
    watson.source as watson_source,
    people_search.source as people_search_source
FROM `{people_search}` as people_search
INNER JOIN `{watson}` as watson
USING(id)
);

INSERT INTO `{output_watson}`
SELECT *
FROM (
    SELECT * FROM $only_watson
    UNION ALL
    SELECT
        id,
        id_type,
        year_of_birth,
        watson_source as source,
        NULL as gender,
        NULL as age,
        NULL as age_segment,
        NULL as income_segment,
        birth_date,
        watson_update_time as update_time
    FROM $both
    WHERE watson_source IS NOT NULL) as new_data
LEFT ONLY JOIN `{output_watson}` as old_data
USING(id);

INSERT INTO `{output_ps}` WITH TRUNCATE
SELECT *
FROM $only_ps
UNION ALL
SELECT
    id,
    id_type,
    NULL as year_of_birth,
    people_search_source as source,
    gender as gender,
    NULL as age,
    NULL as age_segment,
    NULL as income_segment,
    NULL as birth_date,
    people_search_update_time as update_time
FROM $both
WHERE people_search_source IS NOT NULL AND gender IS NOT NULL;
"""


class WatsonMapper(object):
    def __init__(self, date):
        self.update_time = date_helpers.from_utc_date_string_to_timestamp(date)
        self.current_year = int(date.split('-')[0])

    def __call__(self, row):
        if row['Result'] is None:
            return
        if len(row['Result']) < 1:
            return

        data = row['Result'][0]
        profile_id = row['Url'].split('/id')
        birth_date = None

        if len(profile_id) != 2 or 'person_module' not in data:
            return

        profile_id = profile_id[1]

        try:
            month = data['person_module']['birthday']['month']
            month = '0' + str(month) if month < 10 else str(month)

            day = data['person_module']['birthday']['day']
            day = '0' + str(day) if day < 10 else str(day)

            birth_date = '{}-{}-{}'.format(data['person_module']['birthday']['year'], month, day)

        except TypeError:
            return

        record = {
            'id': profile_id,
            'id_type': 'vk_id',
            'update_time': self.update_time,
            'source': 'watson',
        }

        if birth_date and is_valid_birth_date(birth_date, current_year=self.current_year):
            record['birth_date'] = birth_date
            record['year_of_birth'] = get_year_from_birth_date(birth_date)
            yield record


class PeopleSearchSocdem(BaseYtTask):
    date = luigi.Parameter()
    juggler_host = config.CRYPTA_ML_JUGGLER_HOST
    task_group = 'import_socdem_data'

    def requires(self):
        return {
            'MergeSocdemStorage': MergeSocdemStorage(self.date),  # prevent overwriting after validation sample creation
            'PeopleSearchData': ExternalInput(os.path.join(config.SOCDEM_RAW_STORAGE_YT_DIR, 'peoplesearch_vk')),
            'WatsonData': ExternalInput(os.path.join(config.WATSON_VK_DATA, max(self.yt.list(config.WATSON_VK_DATA)), 'results')),
        }

    def output(self):
        return {
            'VkPeopleSearchOutput': YtDailyRewritableTarget(
                os.path.join(config.SOCDEM_STORAGE_YT_DIR, 'vk_id', 'peoplesearch_vk'),
                self.date,
            ),
            'VkWatsonOutput': YtDailyRewritableTarget(
                os.path.join(config.SOCDEM_STORAGE_YT_DIR, 'vk_id', 'watson'),
                self.date,
            ),
        }

    def run(self):
        self.yt.config['spec_defaults']['pool'] = config.SEGMENTS_POOL

        attributes = {
            'schema': [{"name": key, "type": value} for key, value in socdem_storage_schema.iteritems()],
        }

        with self.yt.Transaction() as transaction, \
                self.yt.TempTable(attributes=attributes) as tmp_tables_parsed_watson:

            self.yt.create_empty_table(
                self.output()['VkPeopleSearchOutput'].table,
                schema=socdem_storage_schema,
            )

            self.yt.run_map(
                WatsonMapper(self.date),
                self.input()['WatsonData'].table,
                tmp_tables_parsed_watson,
            )

            self.yql.query(
                query_template.format(
                    watson=tmp_tables_parsed_watson,
                    people_search=self.input()['PeopleSearchData'].table,
                    output_watson=self.output()['VkWatsonOutput'].table,
                    output_ps=self.output()['VkPeopleSearchOutput'].table,
                ),
                transaction=transaction,
            )

            self.yt.set_attribute(self.output()['VkWatsonOutput'].table, 'generate_date', self.date)
            self.yt.set_attribute(self.output()['VkPeopleSearchOutput'].table, 'generate_date', self.date)
