#!/usr/bin/env python
# -*- coding: utf-8 -*-

import json
import logging
import os

from google.protobuf import json_format

from crypta.lib.python.bigb_catboost_applier import features_mapping as feature_mapping_utils
from crypta.lib.python.nirvana.nirvana_helpers.nirvana_transaction import NirvanaTransaction
from crypta.rt_socdem.lib.python.model.config import config
from crypta.rt_socdem.lib.python.model import utils

from ads.bsyeti.libs.primitives.counter_proto import counter_ids_pb2
from yabs.server.proto.keywords import keywords_data_pb2


logger = logging.getLogger(__name__)

get_sample_without_offline_keywords = """
PRAGMA File('bigb.so', 'https://proxy.sandbox.yandex-team.ru/last/BIGB_UDF?attrs={{"released":"stable"}}');
PRAGMA udf('bigb.so');

PRAGMA File(
    'libcrypta_identifier_udf.so',
    'https://proxy.sandbox.yandex-team.ru/last/CRYPTA_IDENTIFIERS_UDF?attrs={{"released":"stable"}}&salt=uyy3u3uy'
);
PRAGMA Udf('libcrypta_identifier_udf.so');

$missing_offline = ($profile) -> {{
    $keywords = ToSet(ListMap(
        $profile.items,
        ($item) -> {{RETURN $item.keyword_id;}}
    ));
    RETURN NOT SetIncludes($keywords, {{{offline_socdem_keywords}}});
}};

$bm_cats_counter = ($dump) -> {{
    RETURN ListFilter(
        $dump.counters,
        ($counter) -> {{
            return $counter.counter_id == {bm_cats_counter_id}
        }}
    )
}};

$get_keyword_by_id = ($dump, $keyword_id) -> {{
    RETURN ListFilter(
        $dump.items,
        ($item) -> {{
            return $item.keyword_id == $keyword_id
        }}
    )
}};

$parsed = (
    SELECT
        UniqID AS uniq_id,
        Bigb::ParseProfile(ProfileDump) AS profile
    FROM `{beh_regular_input_table}`
);

INSERT INTO `{output_table}`
WITH TRUNCATE

SELECT
    uniq_id,
    Yson::Serialize(Yson::FromDoubleDict(
        Coalesce(ToDict(ListZip(
            ListMap($bm_cats_counter(profile)[0].key, ($key) -> {{ RETURN CAST($key AS String) }}),
            $bm_cats_counter(profile)[0].value)
        ), AsDict()))) AS bindings,
    ListFlatten(Coalesce(ListMap($get_keyword_by_id(profile, {operating_systems_keyword_id}),
        ($item) -> {{ RETURN $item.uint_values }}),
        AsList(Null))) AS operating_systems,
    Coalesce(ListMap($get_keyword_by_id(profile, 404),
        ($item) -> {{ RETURN $item.string_value }}), AsList(Null)) AS mobile_models,
    ListFlatten(Coalesce(ListMap($get_keyword_by_id(profile, {regions_keyword_id}),
        ($item) -> {{ RETURN $item.uint_values }}),
        AsList(Null))) AS regions,
FROM $parsed
WHERE $missing_offline(profile);
"""


def compute(yt_client, yql_client, output, yesterday):
    with NirvanaTransaction(yt_client) as transaction, yt_client.TempTable() as without_offline_table:
        yql_client.execute(
            query=get_sample_without_offline_keywords.format(
                bm_cats_counter_id=counter_ids_pb2.ECounterId.CI_QUERY_CATEGORIES_INTEREST,
                operating_systems_keyword_id=keywords_data_pb2.EKeyword.KW_DETAILED_DEVICE_TYPE_BT,
                mobile_models_keyword_id=keywords_data_pb2.EKeyword.KW_DEVICE_MODEL_BT,
                regions_keyword_id=keywords_data_pb2.EKeyword.KW_USER_REGION,
                offline_socdem_keywords=', '.join(
                    [str(keyword) for _, keyword in config.SOCDEM_CLASS_KEYWORDS['offline'].items()]
                ),
                beh_regular_input_table=os.path.join(config.BEH_REGULAR_HOUR_LOG_DIR, '{}T20:00:00'.format(yesterday)),
                output_table=without_offline_table,
            ),
            transaction=str(transaction.transaction_id),
            title='YQL realtime socdem get features mapping',
        )

        logger.info('Sample without offline is calculated.')

        yql_client.execute(
            query=feature_mapping_utils.compute_features_mapping_query.format(
                bm_categories_to_take=config.BM_CATEGORIES_TO_TAKE,
                operating_systems_to_take=config.OPERATING_SYSTEMS_TO_TAKE,
                mobile_models_to_take=config.MOBILE_MODELS_TO_TAKE,
                regions_to_take=config.REGIONS_TO_TAKE,
                train_sample_table=without_offline_table,
                bm_categories_description_table=config.BM_CATEGORIES_DESCRIPTION_TABLE,
                os_description_table=config.OS_DESCRIPTION_TABLE,
                output_table=config.FEATURES_MAPPING_TABLE,
            ),
            transaction=str(transaction.transaction_id),
            title='YQL get realtime socdem features_mapping',
        )

        logger.info('Computed features_mapping')

    feature_mapping_json = json_format.MessageToJson(feature_mapping_utils.get_proto_features_mapping(
        yt_client=yt_client,
        features_mapping_table_path=config.FEATURES_MAPPING_TABLE,
        counters_to_features=utils.COUNTERS_TO_FEATURES,
        keywords_to_features=utils.KEYWORDS_TO_FEATURES,
    ))
    feature_mapping_dict = json.loads(feature_mapping_json)

    assert 'FeatureToIndex' in feature_mapping_dict.keys() and len(feature_mapping_dict['FeatureToIndex']) > 0,\
        'Expected at least one feature in feature mapping'

    with open(output, 'w') as output_file:
        output_file.write(feature_mapping_json)

    logger.info('Successfully dumped features mapping')
