import os

from logging import getLogger

from intranet.search.core.utils.lemmer import get_lemmas
from intranet.search.abovemeta.utils import string_to_bool
from intranet.search.abovemeta.decider.ranking_factors import factors_names

logger = getLogger(__name__)


RANKING_MODELS_PATH = '/intrasearch/etc/bin/ranking_models/'
RANKING_MODELS = {
    'default_model': None,
    'small_pool_model': None,
}


def load_model(name):
    try:
        from catboost import CatBoost
        path = os.path.join(RANKING_MODELS_PATH, name)
        model = CatBoost().load_model(path)
    except:
        logger.exception('Cannot load ranking model %s', name)
        model = None
    return model


def load_ranking_models():
    for model in RANKING_MODELS:
        RANKING_MODELS[model] = load_model(model)


load_ranking_models()


invite_query_lemmas = get_lemmas(['переговорка'])
equipment_query_lemmas = get_lemmas(['сканер', 'принтер', 'факс'])
service_query_lemmas = get_lemmas(['сервис', 'платформа', 'продукт'])
ml_query_lemmas = get_lemmas(['рассылка', 'адрес', 'maillist', 'ml'])
st_query_lemmas = get_lemmas(['тикет', 'таск', 'задача'])


def get_query_factors(state):
    """ Получение факторов, зависящих только от поискового запрса
    """
    if not hasattr(state, '_query_factors_cache'):
        query_lemmas = get_lemmas([n.text for n in state.qtree.filter_text_nodes()])
        state._query_factors_cache = {
            'is_invite_query': int(bool(invite_query_lemmas & query_lemmas)),
            'is_equipment_query': int(bool(equipment_query_lemmas & query_lemmas)),
            'is_service_query': int(bool(service_query_lemmas & query_lemmas)),
            'is_email_tg_query': int('@' in state.text),
            'is_ml_query': int(bool(ml_query_lemmas & query_lemmas)),
            'is_st_query': int(bool(st_query_lemmas & query_lemmas)),
        }
    return state._query_factors_cache


def get_factors(state, parsed):
    factors_dict = get_query_factors(state)
    factors_dict['is_misspell_fixed'] = int(string_to_bool(parsed.properties.get('MisspellFixed')))
    first_doc = parsed.get_docs()[0]
    factors_dict.update(first_doc.factors)
    return factors_dict


def get_relevance(state, parsed, name=''):
    model_name = state.feature_value('ranking_model') or 'default_model'
    model = RANKING_MODELS.get(model_name)
    if not model:
        logger.error('Ranking model is not loaded', extra={'state': state})
        return 0.0

    factors = get_factors(state, parsed)
    relevance = model.predict([factors.get(f, 0) for f in factors_names])

    logger.info(
        'Relevance for %s: %s', name, relevance,
        extra={'state': state, 'context': {'factors': factors, 'relevance': relevance}}
    )
    return relevance
