#!/usr/bin/env python
# -*- coding: utf-8 -*-

import logging

from crypta.lib.python.nirvana.nirvana_helpers.nirvana_transaction import NirvanaTransaction
from crypta.lib.python.yt import yt_helpers
from crypta.lookalike.lib.python.utils import (
    fields,
    mobile_utils,
)
from crypta.lookalike.lib.python.utils.mobile_config import config as mobile_config

logger = logging.getLogger(__name__)

get_publishers_vectors_query = """
$app_hosts = (
    SELECT DISTINCT
        BundleId AS app_id,
        CASE SourceId
            WHEN 1 THEN "gaid"
            WHEN 2 THEN "idfa"
            ELSE Null
        END AS id_type,
        Url::GetDomain(WebUrl, 2) AS host
    FROM `{app_to_web_urls}`
);

$with_vectors = (
    SELECT
        app_hosts.*,
        vector,
        categories
    FROM $app_hosts AS app_hosts
    INNER JOIN `{site2vec}` AS site2vec
    ON app_hosts.host == site2vec.host
    INNER JOIN `{merged_stores}` AS merged_stores
    ON app_hosts.app_id == merged_stores.app_id AND app_hosts.id_type == merged_stores.id_type
);

INSERT INTO `{apps_host_vectors}`
WITH TRUNCATE

SELECT
    app_id,
    id_type,
    host,
    vector
FROM $with_vectors;

INSERT INTO `{category_host_vectors}`
WITH TRUNCATE

SELECT
    category,
    app_id,
    id_type,
    vector
FROM $with_vectors
FLATTEN LIST BY categories AS category
ORDER BY category;
"""


def get(nv_params):
    yt_client = mobile_utils.get_yt_client(nv_params=nv_params)
    yql_client = mobile_utils.get_yql_client(nv_params=nv_params)

    with NirvanaTransaction(yt_client) as transaction, yt_client.TempTable() as category_host_vector_table:
        yql_client.execute(
            query=get_publishers_vectors_query.format(
                app_to_web_urls=mobile_config.APP_TO_WEB_URLS,
                site2vec=mobile_config.SITE2VEC_TABLE,
                merged_stores=mobile_config.MERGED_STORES,
                apps_host_vectors=mobile_config.APPS_VECTORS_BY_PUBLISHER,
                category_host_vectors=category_host_vector_table,
            ),
            transaction=str(transaction.transaction_id),
            title='YQL get publisher host vectors for apps',
        )

        yt_helpers.create_empty_table(
            yt_client=yt_client,
            path=mobile_config.CATEGORY_VECTORS_BY_PUBLISHER,
            schema={
                fields.category: 'string',
                fields.vector: 'any',
            },
            additional_attributes={'optimize_for': 'scan'},
            force=True,
        )

        yt_client.run_reduce(
            mobile_utils.reduce_apps_vectors_by_category,
            category_host_vector_table,
            mobile_config.CATEGORY_VECTORS_BY_PUBLISHER,
            reduce_by=fields.category,
            spec={
                'title': 'Reduce host vectors for categories',
            },
        )
