from datetime import timedelta
import os

from yt.yson import get_bytes

from crypta.lib.proto.user_data import user_data_stats_pb2
from crypta.lib.python import templater
from crypta.lib.python.yt import yt_helpers
from crypta.lookalike.lib.python.utils import (
    fields,
    utils,
)
from crypta.lookalike.lib.python.utils.config import config


class CaesarGoalFeaturesMapper(object):
    def __init__(self, date):
        self.date = date

    def start(self):
        self.user_data_stats = user_data_stats_pb2.TUserDataStats()

    def __call__(self, row):
        self.user_data_stats.ParseFromString(get_bytes(row[fields.stats]))

        goal_features = {
            fields.goal_id: int(row[fields.group_id]),
            fields.affinitive_sites: dict(),
            fields.affinitive_apps: dict(),
            fields.affinitive_words: dict(),
            fields.last_update_date: self.date,
        }

        for feature_type, feature_field, features_limit in (
            (fields.affinitive_sites, 'Hosts', config.AFFINITIVE_SITES_TO_STORE_FOR_GOALS),
            (fields.affinitive_apps, 'Apps', config.AFFINITIVE_APPS_TO_STORE_FOR_GOALS),
            (fields.affinitive_words, 'Words', config.AFFINITIVE_WORDS_TO_STORE_FOR_GOALS),
        ):
            for token in sorted(getattr(self.user_data_stats.Affinities, feature_field).Token,
                                key=lambda token: token.Weight, reverse=True)[:features_limit]:
                goal_features[feature_type][token.Token] = token.Weight

        if len(goal_features[fields.affinitive_sites]) > 0 or len(goal_features[fields.affinitive_apps]) > 0 or \
                len(goal_features[fields.affinitive_words]) > 0:
            yield goal_features


def compute(date):
    yt_client = utils.get_yt_client(yt_pool='crypta_features_export')
    yql_client = utils.get_yql_client(yt_pool='crypta_features_export')

    with yt_client.Transaction() as transaction, \
            yt_client.TempTable() as goals_not_to_update_table, \
            yt_client.TempTable() as goals_for_description_table, \
            yt_client.TempTable() as goals_user_data_stats_table:
        query = templater.render_resource(
            '/crypta/lookalike/select_goals_for_caesar.yql',
            strict=True,
            vars={
                'date': date,
                'days_back_for_reached_goals': config.DAYS_BACK_FOR_REACHED_GOALS,
                'days_without_update': config.DAYS_WITHOUT_UPDATE_GOALS,
                'goal_audiences_table': config.GOALS_TABLE,
                'goal_features_for_caesar_full_table': config.GOAL_FEATURES_FOR_CAESAR_FULL_TABLE,
                'caesar_goals_dump_latest_table': config.CAESAR_GOALS_DUMP_LATEST_TABLE,
                'goals_not_to_update_table': goals_not_to_update_table,
                'goals_for_description_table': goals_for_description_table,
            },
        )
        yql_client.execute(
            query=query,
            transaction=str(transaction.transaction_id),
            title='YQL Select goals for caesar',
        )

        utils.describe_segments_in_siberia(
            yt_client,
            goals_for_description_table,
            goals_user_data_stats_table,
            config.FOR_DESCRIPTION_BY_CRYPTAID_TABLE,
            transaction,
        )

        yt_helpers.create_empty_table(
            yt_client=yt_client,
            path=config.GOAL_FEATURES_FOR_CAESAR_DAILY_LATEST_TABLE,
            schema={
                fields.goal_id: 'uint64',
                fields.affinitive_sites: 'any',
                fields.affinitive_apps: 'any',
                fields.affinitive_words: 'any',
                fields.last_update_date: 'string',
            },
            additional_attributes={'optimize_for': 'scan'},
            force=True,
        )

        yt_client.run_map(
            CaesarGoalFeaturesMapper(date=date),
            goals_user_data_stats_table,
            config.GOAL_FEATURES_FOR_CAESAR_DAILY_LATEST_TABLE,
        )

        yt_client.run_sort(
            config.GOAL_FEATURES_FOR_CAESAR_DAILY_LATEST_TABLE,
            sort_by=fields.goal_id,
        )

        daily_table = os.path.join(config.GOAL_FEATURES_FOR_CAESAR_DAILY_DIR, date)
        yt_client.copy(
            config.GOAL_FEATURES_FOR_CAESAR_DAILY_LATEST_TABLE,
            os.path.join(config.GOAL_FEATURES_FOR_CAESAR_DAILY_DIR, date),
            force=True,
        )

        yt_helpers.set_ttl(
            table=daily_table,
            ttl_timedelta=timedelta(days=config.GOAL_FEATURES_FOR_CAESAR_DAILY_TABLES_TTL_DAYS),
            yt_client=yt_client,
        )

        yt_helpers.create_empty_table(
            yt_client=yt_client,
            path=config.GOAL_FEATURES_FOR_CAESAR_FULL_TABLE,
            schema={
                fields.goal_id: 'uint64',
                fields.affinitive_sites: 'any',
                fields.affinitive_apps: 'any',
                fields.affinitive_words: 'any',
                fields.last_update_date: 'string',
            },
            additional_attributes={'optimize_for': 'scan'},
            force=True,
        )

        yt_client.run_sort(
            [
                goals_not_to_update_table,
                config.GOAL_FEATURES_FOR_CAESAR_DAILY_LATEST_TABLE,
            ],
            config.GOAL_FEATURES_FOR_CAESAR_FULL_TABLE,
            sort_by=fields.goal_id,
        )
