#!/usr/bin/env python
# -*- coding: utf-8 -*-
from collections import defaultdict

import luigi

from crypta.profile.utils.config import config
from crypta.profile.utils.luigi_utils import BaseYtTask, YtDailyRewritableTarget
from crypta.profile.utils.interests_helpers import (
    LabSegmentsInfo,
    get_catalogia_interest_segment_ids_with_categories,
)


def generate_catalogia_interests_mapping_for_bb():
    segment_id_to_catalogia_categories = get_catalogia_interest_segment_ids_with_categories()

    lab_segments_info = LabSegmentsInfo()

    catalogia_category_to_shortterm_interest_ids_dict = defaultdict(set)
    for segment_id, catalogia_categories in segment_id_to_catalogia_categories.iteritems():
        for catalogia_category in catalogia_categories:
            for parent_segment_id in lab_segments_info.get_segment_with_segment_only_parents(segment_id):
                shortterm_interest_id = lab_segments_info.lab_segment_id_to_shortterm_interest_id[parent_segment_id]
                catalogia_category_to_shortterm_interest_ids_dict[catalogia_category].add(shortterm_interest_id)

    for catalogia_category, shortterm_interest_ids in catalogia_category_to_shortterm_interest_ids_dict.iteritems():
        yield {
            'catalogia_id': catalogia_category,
            'shortterm_interests_ids': list(shortterm_interest_ids),
        }


class PrepareCatalogiaMappingForBB(BaseYtTask):
    date = luigi.Parameter()
    task_group = 'catalogia_mapping'

    def output(self):
        return YtDailyRewritableTarget(
            config.CATALOGIA_TO_INTERESTS_BIGB_MAPPING_TABLE,
            self.date,
        )

    def run(self):
        with self.yt.Transaction():
            self.yt.create_empty_table(
                self.output().table,
                schema={
                    'catalogia_id': 'uint64',
                    'shortterm_interests_ids': 'any'
                },
            )

            self.yt.write_table(
                self.output().table,
                generate_catalogia_interests_mapping_for_bb(),
            )

            self.yt.run_sort(
                self.output().table,
                sort_by='catalogia_id',
            )

            self.yt.set_attribute(
                self.output().table,
                'generate_date',
                self.date,
            )
