#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import time
import datetime
from collections import defaultdict
from functools import partial

import luigi

from crypta.profile.lib import date_helpers

from crypta.profile.utils.config import config
from crypta.profile.utils.luigi_utils import (
    BaseYtTask,
    OldNodesByNameCleaner,
    YtTarget,
    YtDailyRewritableTarget,
)
from crypta.profile.utils.interests_helpers import LabSegmentsInfo


interest_merge_query_template = """
INSERT INTO `{destination_table}`
SELECT
    yandexuid,
    interest_lab_id,
    CAST(`date` AS String) AS `date`,
    datetime_bin,
    COUNT(*) AS `count`
FROM CONCAT({source_tables})
GROUP BY yandexuid, interest_lab_id, `date`, datetime_bin
"""


class InterestsMergeTask(BaseYtTask):
    date = luigi.Parameter()
    task_group = 'longterm_interests'

    def output(self):
        return YtTarget(os.path.join(config.INTERESTS_MERGED_FOLDER, self.date))

    def run(self):
        self.yt.config['spec_defaults']['pool'] = config.INTERESTS_POOL

        with self.yt.Transaction() as transaction:
            input_tables = list(self.yt.search(
                os.path.join(config.INTERESTS_PROCESSED_FOLDER, self.date),
                node_type='table',
            ))

            if len(input_tables) == 0:
                return

            self.yt.create_empty_table(
                self.output().table,
                schema={
                    'yandexuid': 'uint64',
                    'interest_lab_id': 'string',
                    'date': 'string',
                    'datetime_bin': 'string',
                    'count': 'uint64',
                },
            )

            generate_time = time.time()
            self.yt.set_attribute(
                self.output().table,
                'generate_datetime',
                date_helpers.to_date_string(
                    date_helpers.from_timestamp_to_datetime(generate_time),
                    date_format=date_helpers.YT_DATETIME_FORMAT,
                ),
            )

            self.yql.query(
                query_string=interest_merge_query_template.format(
                    source_tables='`{}`'.format('`, `'.join(input_tables)),
                    destination_table=self.output().table,
                ),
                transaction=transaction,
            )

            self.yt.run_sort(
                self.output().table,
                sort_by=('yandexuid', 'interest_lab_id'),
            )

    def complete(self):
        # we need this to recalculate day merger if not all tables were ready
        if not self.output().exists():
            return False

        output_generate_datetime = date_helpers.from_date_string_to_datetime(self.yt.get_attribute(
            self.output().table,
            'generate_datetime',
        ), date_helpers.YT_DATETIME_FORMAT)

        if datetime.date.today() <= output_generate_datetime.date():
            return True

        for table in self.yt.search(os.path.join(config.INTERESTS_PROCESSED_FOLDER, self.date), node_type='table'):
            input_generate_datetime = date_helpers.from_date_string_to_datetime(self.yt.get_attribute(
                table,
                'generate_datetime',
            ), date_helpers.YT_DATETIME_FORMAT)
            if input_generate_datetime > output_generate_datetime:
                return False

        return True


def get_longterm_interests_reducer(key, rows, min_interest_duration_in_days):
    out_row = dict(key)
    out_row['dates'] = defaultdict(dict)

    for row in rows:
        out_row['dates'][row['date']][row['datetime_bin']] = row['count']

    first_interest_activity_date = date_helpers.from_date_string_to_datetime(min(out_row['dates'].keys()), date_helpers.DATE_FORMAT)
    last_interest_activity_date = date_helpers.from_date_string_to_datetime(max(out_row['dates'].keys()), date_helpers.DATE_FORMAT)
    interest_duration = (last_interest_activity_date - first_interest_activity_date).days

    if interest_duration >= min_interest_duration_in_days:
        yield out_row


def get_longterm_interests_to_profiles_reducer(key, rows, longterm_interest_ids):
    out_row = {
        'id': str(key['yandexuid']),
        'id_type': 'yandexuid',
        'longterm_interests': set(),
    }

    for row in rows:
        # shortterm, but not longterm interest
        segment_longterm_id = longterm_interest_ids.get(row['interest_lab_id'])
        if segment_longterm_id is not None:
            out_row['longterm_interests'].add(segment_longterm_id)

    out_row['longterm_interests'] = list(out_row['longterm_interests'])
    yield out_row


class LongtermInterestsTask(BaseYtTask):
    date = luigi.Parameter()
    task_group = 'longterm_interests'

    def requires(self):
        return {
            'MergeTasks': [
                InterestsMergeTask(date)
                for date in date_helpers.generate_back_dates(self.date, config.NUMBER_OF_DAYS_TO_CALCULATE_LONGTERM_INTERESTS)
            ],
            'MergedFolderCleaner': OldNodesByNameCleaner(
                date=self.date,
                folder=config.INTERESTS_MERGED_FOLDER,
                lifetime=config.NUMBER_OF_DAYS_TO_CALCULATE_LONGTERM_INTERESTS + 5,
            ),
            'LongtermFolderCleaner': OldNodesByNameCleaner(
                date=self.date,
                folder=config.INTERESTS_LONGTERM_FOLDER,
                lifetime=5,
            ),
        }

    def output(self):
        return YtDailyRewritableTarget(
            os.path.join(
                config.PROFILES_SEGMENT_PARTS_YT_DIRECTORY,
                'longterm_interests',
            ),
            self.date,
        )

    def run(self):
        self.yt.config['spec_defaults']['pool'] = config.SEGMENTS_POOL
        with self.yt.Transaction():
            input_tables = [target.table for target in self.input()['MergeTasks']]
            longterm_interests_table = os.path.join(
                config.INTERESTS_LONGTERM_FOLDER,
                self.date,
            )

            self.yt.create_empty_table(
                longterm_interests_table,
                schema={
                    'yandexuid': 'uint64',
                    'interest_lab_id': 'string',
                    'dates': 'any',
                },
            )
            self.yt.run_reduce(
                partial(
                    get_longterm_interests_reducer,
                    min_interest_duration_in_days=config.MIN_INTEREST_DURATION_IN_DAYS,
                ),
                input_tables,
                longterm_interests_table,
                reduce_by=('yandexuid', 'interest_lab_id'),
            )

            self.yt.run_sort(
                longterm_interests_table,
                sort_by=('yandexuid', 'interest_lab_id'),
            )

            self.yt.create_empty_table(
                self.output().table,
                schema={
                    'id': 'string',
                    'id_type': 'string',
                    'longterm_interests': 'any',
                },
            )

            self.yt.run_reduce(
                partial(
                    get_longterm_interests_to_profiles_reducer,
                    longterm_interest_ids=LabSegmentsInfo().lab_segment_id_to_longterm_interest_id,
                ),
                longterm_interests_table,
                self.output().table,
                reduce_by='yandexuid',
            )

            self.yt.run_sort(
                self.output().table,
                sort_by=['id', 'id_type'],
            )

            self.yt.set_attribute(
                self.output().table,
                'generate_date',
                self.date,
            )
