# -*- coding: utf-8 -*-

import os
import json
from urllib import unquote
from urlparse import urlparse, parse_qs

import luigi
from yt.wrapper import create_table_switch, with_context

from crypta.profile.lib import date_helpers

from crypta.profile.utils.config import config
from crypta.profile.utils.socdem import socdem_storage_schema
from crypta.profile.utils.socdem import is_valid_birth_date, get_year_from_birth_date
from crypta.profile.utils.luigi_utils import YtDailyRewritableTarget, AttributeExternalInput, BaseYtTask


VK_DATE_FORMAT = '%d.%m.%Y'


@with_context
class FilterVKSocdemReducer(object):
    def __init__(self, id_type, date):
        self.id_type = id_type
        self.update_time = date_helpers.from_utc_date_string_to_timestamp(date)

    def __call__(self, key, rows, context):
        storage_row = None

        genders = set()
        birth_dates = set()

        for row in rows:
            if context.table_index == 0:
                storage_row = row
            else:
                if 'gender' in row and row['gender']:
                    genders.add(row['gender'])
                if 'birth_date' in row and row['birth_date']:
                    birth_dates.add(row['birth_date'])

        new_row = {
            'id': key['id'],
            'source': 'watch-log_vk',
            'id_type': self.id_type,
            'update_time': self.update_time,
        }

        if len(genders) == 1:
            new_row['gender'] = list(genders)[0]

        if len(birth_dates) == 1:
            new_row['birth_date'] = list(birth_dates)[0]
            new_row['year_of_birth'] = get_year_from_birth_date(new_row['birth_date'])

        if storage_row:
            if 'gender' not in new_row and storage_row['gender']:
                new_row['gender'] = storage_row['gender']

            if 'birth_date' not in new_row and storage_row['birth_date']:
                new_row['birth_date'] = storage_row['birth_date']
                new_row['year_of_birth'] = storage_row['year_of_birth']

            if 'birth_date' not in new_row and storage_row['year_of_birth']:
                new_row['year_of_birth'] = storage_row['year_of_birth']

        if 'gender' in new_row or 'birth_date' in new_row or 'year_of_birth' in new_row:
            yield new_row


class VKSocdemMapper(object):
    def __init__(self, current_year):
        self.current_year = current_year

    @staticmethod
    def _find_vk(url):
        return '%2f%2fvk.com' in url or '.vk.com' in url or '//vk.com' in url

    @staticmethod
    def _find_vk_away(url):
        return url == 'https://away.vk.com/away.php' or url == 'http://away.vk.com/away.php'

    def _get_vk_socdem_from_url(self, url):
        gender_converter_dict = {1: 'f', 2: 'm'}

        try:
            params = parse_qs(urlparse(url).query)
            if 'api_result' in params:
                api_result = json.loads(unquote(params['api_result'][0]), encoding='utf-8')['response']

                if isinstance(api_result, list) and len(api_result) > 0:
                    api_result = api_result[0]

                if isinstance(api_result, dict):
                    result = {
                        'vk_id': api_result.get('id'),
                    }

                    birth_date = api_result.get('bdate')
                    if birth_date and is_valid_birth_date(birth_date, self.current_year, VK_DATE_FORMAT):
                        day, month, year = birth_date.split('.')
                        result['birth_date'] = '-'.join([year, month.zfill(2), day.zfill(2)])

                    gender = api_result.get('sex')
                    if gender and int(gender) in gender_converter_dict:
                        result['gender'] = gender_converter_dict[int(gender)]

                    if 'gender' in result or 'birth_date' in result:
                        return result
        except Exception:
            return None

        return None

    def __call__(self, row):
        lower_url = row['url'].lower() if row['url'] is not None else ''
        lower_referer = row['referer'].lower() if row['referer'] is not None else ''

        vk_api_result = None

        if self._find_vk(lower_url):
            vk_api_result = self._get_vk_socdem_from_url(lower_url)

        if vk_api_result is None:
            if self._find_vk(lower_referer) and not self._find_vk_away(lower_referer):
                vk_api_result = self._get_vk_socdem_from_url(lower_referer)

        if vk_api_result and (vk_api_result.get('gender') or vk_api_result.get('birth_date')):
            socdem_result_row = {
                'birth_date': vk_api_result.get('birth_date'),
                'gender': vk_api_result.get('gender'),
            }

            result_row = socdem_result_row.copy()
            result_row['id'] = str(row['yandexuid'])

            yield create_table_switch(0)
            yield result_row

            vk_id = vk_api_result.get('vk_id')

            if vk_id and int(vk_id) > 0:
                result_row = socdem_result_row.copy()
                result_row['id'] = str(vk_id)

                yield create_table_switch(1)
                yield result_row


class MetricsAndBarVKSocdem(BaseYtTask):
    date = luigi.Parameter()
    juggler_host = config.CRYPTA_ML_JUGGLER_HOST
    task_group = 'import_socdem_data'

    def requires(self):
        return {
            'Metrics': AttributeExternalInput(
                os.path.join(
                    config.METRICS_PARSED_DIR,
                    self.date,
                ),
                attribute_name='closed',
                attribute_value=True,
                columns=('yandexuid', 'url', 'referer'),
            ),
            'Bar': AttributeExternalInput(
                os.path.join(
                    config.BAR_PARSED_DIR,
                    self.date,
                ),
                attribute_name='closed',
                attribute_value=True,
                columns=('yandexuid', 'url', 'referer'),
            ),
        }

    def output(self):
        return {
            'yandexuid': YtDailyRewritableTarget(
                os.path.join(config.SOCDEM_STORAGE_YT_DIR, 'yandexuid', 'watch-log_vk'),
                date=self.date,
            ),
            'vk_id': YtDailyRewritableTarget(
                os.path.join(config.SOCDEM_STORAGE_YT_DIR, 'vk_id', 'watch-log_vk'),
                date=self.date,
            ),
        }

    def run(self):
        current_year = int(self.date.split('-')[0])

        with self.yt.Transaction(), \
                self.yt.TempTable() as yandexuid_with_dates, \
                self.yt.TempTable() as vk_with_dates:

            if not self.yt.exists(self.output()['yandexuid'].table):
                self.yt.create_empty_table(
                    self.output()['yandexuid'].table,
                    schema=socdem_storage_schema,
                )

                self.yt.run_sort(
                    self.output()['yandexuid'].table,
                    sort_by='id',
                )

            if not self.yt.exists(self.output()['vk_id'].table):
                self.yt.create_empty_table(
                    self.output()['vk_id'].table,
                    schema=socdem_storage_schema,
                )

                self.yt.run_sort(
                    self.output()['vk_id'].table,
                    sort_by='id',
                )

            self.yt.run_map(
                VKSocdemMapper(current_year),
                [self.input()['Bar'].table, self.input()['Metrics'].table],
                [yandexuid_with_dates, vk_with_dates],
            )

            self.yt.run_sort(yandexuid_with_dates, sort_by='id')
            self.yt.run_sort(vk_with_dates, sort_by='id')

            self.yt.run_reduce(
                FilterVKSocdemReducer('yandexuid', self.date),
                [self.output()['yandexuid'].table, yandexuid_with_dates],
                self.output()['yandexuid'].table,
                reduce_by='id',
            )

            self.yt.run_reduce(
                FilterVKSocdemReducer('vk_id', self.date),
                [self.output()['vk_id'].table, vk_with_dates],
                self.output()['vk_id'].table,
                reduce_by='id',
            )

            for _, output_target in self.output().iteritems():
                self.yt.run_sort(output_target.table, sort_by='id')

                self.yt.set_attribute(
                    output_target.table,
                    'generate_date',
                    self.date,
                )
