import collections
import json
import logging
import six

from crypta.lib.python import templater
from crypta.lib.python.nirvana.nirvana_helpers.nirvana_transaction import NirvanaTransaction
from crypta.lib.python.yt import yt_helpers
from crypta.lookalike.lib.python.utils import (
    fields,
    utils,
)
from crypta.lookalike.lib.python.utils.config import config


logger = logging.getLogger(__name__)


def get_features_schema(features_mapping):
    return [
        {
            'name': major_key,
            'type': 'string',
        } for major_key in set(map(lambda feature_name: feature_name.split('_')[0], features_mapping))
    ]


class MakeGroupFeaturesMapper:
    def __init__(self, features_mapping):
        self.features_mapping = features_mapping

    def __call__(self, row):
        segment_float_features = list(map(float, row['segment_float_features'].split(',')))
        site2vec = segment_float_features[:config.EMBEDDING_FEATURES_SIZE]
        other_features = segment_float_features[config.EMBEDDING_FEATURES_SIZE:]

        output_row = collections.defaultdict(list)

        for key, value in six.iteritems(self.features_mapping):
            major_key, minor_key = key.split('_')
            output_row[major_key].append('{}:{}'.format(minor_key, round(other_features[value], 4)))

        for key in output_row:
            output_row[key] = ' '.join(output_row[key])

        output_row.update({
            fields.group_id: row[fields.group_id],
            'segment_site2vec': site2vec,
            fields.segment_affinitive_sites_ids: row[fields.segment_affinitive_sites_ids],
            fields.segment_affinitive_apps: row[fields.segment_affinitive_apps],
            fields.segment_vector: row[fields.segment_vector],
        })

        yield output_row


def describe(yt_client, input_table, output_table, transaction):
    utils.describe_segments_in_siberia(
        yt_client=yt_client,
        input_table=input_table,
        output_table=output_table,
        userdata_for_description=config.FOR_DESCRIPTION_BY_CRYPTAID_TABLE,
        transaction=transaction,
    )


def get_group_features(
    yt_client,
    yql_client,
    sample_table,
    group_features_table,
    model_dir=None,
):
    with NirvanaTransaction(yt_client) as transaction, \
            yt_client.TempTable() as with_description, \
            yt_client.TempTable() as with_features, \
            yt_client.TempTable() as with_segment_vector:
        describe(
            yt_client=yt_client,
            input_table=sample_table,
            output_table=with_description,
            transaction=transaction,
        )

        if model_dir is not None:
            _, (_, segments_dict_file) = utils.get_dssm_entities_from_dir(model_dir)
        else:
            _, (_, segments_dict_file) = utils.get_last_version_of_dssm_entities(yt_client)

        features_mapping = json.loads(next(yt_client.read_file(segments_dict_file)))

        utils.get_segment_dssm_features(
            yt_client=yt_client,
            segments_with_description_table=with_description,
            segment_dssm_features_table=with_features,
            features_mapping=features_mapping,
        )

        yql_client.execute(
            query=templater.render_template(
                utils.get_segments_embeddings_query_template,
                vars={
                    'additional_fields': [
                        fields.segment_affinitive_sites_ids,
                        fields.segment_affinitive_apps,
                        fields.segment_float_features,
                    ],
                    'model_path': utils.get_lal_model_source_link(released='stable'),
                    'segments_dssm_features_table': with_features,
                    'segments_dssm_vectors_table': with_segment_vector,
                },
            ),
            transaction=str(transaction.transaction_id),
            title='YQL Add Lookalike DSSM vectors to features',
        )

        yt_helpers.create_empty_table(
            yt_client,
            group_features_table,
            schema=[
                {'name': fields.group_id, 'type': 'string'},
                {'name': fields.segment_vector, 'type_v3': {'item': 'double', 'type_name': 'list'}},
                {'name': 'segment_site2vec', 'type_v3': {'item': 'double', 'type_name': 'list'}},
                {'name': fields.segment_affinitive_sites_ids, 'type': 'string'},
                {'name': fields.segment_affinitive_apps, 'type': 'string'},
            ] + get_features_schema(features_mapping),
            additional_attributes={'optimize_for': 'scan'},
            force=True,
        )

        yt_client.run_map(
            MakeGroupFeaturesMapper(features_mapping=features_mapping),
            with_segment_vector,
            group_features_table,
        )

        yt_client.run_sort(
            group_features_table,
            sort_by=fields.group_id,
        )
