#!/usr/bin/env python
# -*- coding: utf-8 -*-

import json
import logging

from crypta.lib.python import native_yt
from crypta.lib.python.native_operations import lab
from crypta.lib.python.nirvana.nirvana_helpers.nirvana_transaction import NirvanaTransaction
from crypta.lib.python.yt import yt_helpers
from crypta.lookalike.lib.python.utils import (
    fields,
    mobile_utils,
)
from crypta.lookalike.lib.python.utils.mobile_config import config as mobile_config

logger = logging.getLogger(__name__)

get_train_app_by_crypta_id_query = """
$apps = (
    SELECT DISTINCT app_id
    FROM (
        SELECT app_id
        FROM `{train_apps}` as train_apps
            UNION ALL
        SELECT app_id
        FROM `{val_apps}` as val_apps
    )
);

$filtered_by_user_data = (
    SELECT DISTINCT
        cryptaId,
        devid_by_app.app_id AS app_id
    FROM `{devid_by_app}` AS devid_by_app
    INNER JOIN `{user_data}` AS user_data
    ON devid_by_app.cryptaId == user_data.CryptaID
);

$crypta_id_by_app = (
    SELECT
        cryptaId,
        devid_by_app.app_id AS GroupID,
        ROW_NUMBER() OVER w AS row_rank
    FROM $filtered_by_user_data AS devid_by_app
    INNER JOIN $apps AS apps
    ON devid_by_app.app_id == apps.app_id
    WINDOW w AS (
        PARTITION BY apps.app_id
        ORDER BY RANDOM(apps.app_id)
    )
);

INSERT INTO `{app_by_crypta_id}`
WITH TRUNCATE

SELECT *
FROM $crypta_id_by_app
WHERE row_rank < {MAX_SEGMENT_SIZE};

$crypta_id_by_category = (
    SELECT
        cryptaId,
        category as GroupID,
        ROW_NUMBER() OVER w AS row_rank
    FROM $filtered_by_user_data AS devid_by_app
    INNER JOIN `{merged_stores}` AS merged_stores
    ON devid_by_app.app_id == merged_stores.app_id
    WHERE merged_stores.category IS NOT NULL
    WINDOW w AS (
        PARTITION BY merged_stores.category
        ORDER BY RANDOM(merged_stores.category)
    )
);

INSERT INTO `{category_by_crypta_id}`
WITH TRUNCATE

SELECT *
FROM $crypta_id_by_category
WHERE row_rank < {MAX_SEGMENT_SIZE};
"""

join_with_user_data_query = """
INSERT INTO `{output_table}`
WITH TRUNCATE

SELECT
    user_app.cryptaId as cryptaId,
    user_app.GroupID as GroupID,
    user_data.Vectors AS Vectors,
    user_data.Segments AS Segments,
    user_data.Attributes AS Attributes,
    user_data.Affinities AS Affinities
FROM `{input_table}` AS user_app
JOIN `{user_data}` VIEW raw AS user_data
ON user_app.cryptaId == user_data.CryptaID;
"""


def prepare(nv_params):
    yt_client = mobile_utils.get_yt_client(nv_params=nv_params)
    yql_client = mobile_utils.get_yql_client(nv_params=nv_params)

    with NirvanaTransaction(yt_client) as transaction, \
            yt_client.TempTable() as app_by_crypta_id_table, \
            yt_client.TempTable() as category_by_crypta_id_table, \
            yt_client.TempTable() as segment_by_crypta_id_with_user_data:
        yql_client.execute(
            query=get_train_app_by_crypta_id_query.format(
                merged_stores=mobile_config.MERGED_STORES,
                devid_by_app=mobile_config.DEVID_BY_APP_WITH_CRYPTA_ID,
                app_by_crypta_id=app_by_crypta_id_table,
                category_by_crypta_id=category_by_crypta_id_table,
                train_apps=mobile_config.TRAIN_APPS_TABLE,
                val_apps=mobile_config.VALIDATION_APPS_TABLE,
                user_data=mobile_config.USER_DATA_BY_CRYPTA_ID,
                MAX_SEGMENT_SIZE=mobile_config.MAX_SEGMENT_SIZE,
            ),
            transaction=str(transaction.transaction_id),
            title='YQL get users for apps',
        )

        for input_table, output_table in (
                (app_by_crypta_id_table, mobile_config.USER_DATA_STATS_FOR_APPS),
                (category_by_crypta_id_table, mobile_config.USER_DATA_STATS_FOR_CATEGORIES),
        ):
            yql_client.execute(
                query=join_with_user_data_query.format(
                    input_table=input_table,
                    user_data=mobile_config.USER_DATA_BY_CRYPTA_ID,
                    output_table=segment_by_crypta_id_with_user_data,
                ),
                transaction=str(transaction.transaction_id),
                title='YQL join with UserData',
            )

            native_yt.run_native_map_reduce_with_combiner(
                mapper_name=lab.TTransformUserDataToUserDataStats,
                combiner_name=lab.TMergeUserDataStats,
                reducer_name=lab.TMergeUserDataStats,
                source=segment_by_crypta_id_with_user_data,
                destination=output_table,
                reduce_by=fields.group_id,
                sort_by=fields.group_id,
                proxy=str(yt_client.config['proxy']['url']),
                pool=str(yt_client.config['pool']),
                token=str(yt_client.config['token']),
                transaction=str(transaction.transaction_id),
                title='Mobile LaL compute segments stats',
                spec={
                    'mapper': {
                        'memory_limit': 16 * 1024 * 1024 * 1024,
                        'memory_reserve_factor': 1,
                    },
                    'reduce_combiner': {
                        'memory_limit': 2 * 1024 * 1024 * 1024,
                        'memory_reserve_factor': 1,
                    },
                    'reducer': {
                        'memory_limit': 2 * 1024 * 1024 * 1024,
                        'memory_reserve_factor': 1,
                    },
                },
            )


def get(nv_params, output):
    yt_client = mobile_utils.get_yt_client(nv_params=nv_params)

    features_mapping = dict()
    for row in yt_client.read_table(mobile_config.LAL_FEATURES_MAPPING_TABLE):
        features_mapping[row[fields.feature]] = row[fields.feature_index]

    logger.info('Downloaded features mapping')

    with NirvanaTransaction(yt_client):
        for input_table, output_table in (
            (mobile_config.USER_DATA_STATS_FOR_APPS, mobile_config.APP_SEGMENTS_DSSM_WEB_FEATURES_TABLE),
            (mobile_config.USER_DATA_STATS_FOR_CATEGORIES, mobile_config.CATEGORY_SEGMENTS_DSSM_WEB_FEATURES_TABLE)
        ):
            yt_helpers.create_empty_table(
                yt_client=yt_client,
                path=output_table,
                schema={
                    fields.group_id: 'string',
                    fields.app_segment_features: 'string',
                },
                additional_attributes={'optimize_for': 'scan'},
                force=True,
            )

            yt_client.run_map(
                mobile_utils.MakeDssmAppFeaturesMapper(features_mapping=features_mapping),
                input_table,
                output_table,
            )

            yt_client.run_sort(
                output_table,
                sort_by=fields.group_id,
            )

    logger.info('Computed apps dssm features from web')

    with open(output, 'w') as output_file:
        json.dump(features_mapping, output_file)

    logger.info('Successfully dumped features mapping')
