import functools
import itertools
import json
import operator

from constance import config

from django.conf import settings
from django.contrib.postgres.aggregates import ArrayAgg
from django.contrib.postgres.search import SearchVector
from django.db.models import TextField, Value, Q, F

from intranet.femida.src.core.signals import post_update
from intranet.femida.src.publications.choices import (
    PUBLICATION_FACETS,
    PUBLICATION_LANGUAGES,
    PUBLICATION_STATUSES,
    PUBLICATION_TYPES,
    PUBLICATION_PRO_LEVELS,
    VACANCY_TO_PUBLICATION_PRO_LEVELS_MAP,
    PUBLICATION_EMPLOYMENT_TYPES,
)
from intranet.femida.src.publications.helpers import get_vacancy_employment_types
from intranet.femida.src.publications.models import Publication
from intranet.femida.src.utils.begemot import SynonymAPI
from intranet.femida.src.vacancies.choices import VACANCY_PRO_LEVELS


def archive_vacancy_publications(vacancy):
    publications = vacancy.publications.filter(status=PUBLICATION_STATUSES.published)
    publications.update(status=PUBLICATION_STATUSES.archived)
    post_update.send(
        sender=Publication,
        queryset=publications,
    )


def unarchive_vacancy_publications(vacancy):
    publications = vacancy.publications.filter(status=PUBLICATION_STATUSES.archived)
    publications.update(status=PUBLICATION_STATUSES.published)
    post_update.send(
        sender=Publication,
        queryset=publications,
    )


def _get_facet_data(queryset, facet, publications_path, value_field='id'):
    """
    Вычисляет списки публикаций по всем возможным значениям заданного фасета
    """
    publications_data = (
        queryset
        .annotate(
            value=F(value_field),
            lang=F(f'{publications_path}__lang'),
            publication_ids=ArrayAgg(
                f'{publications_path}',
                distinct=True,
                filter=Q(**{
                    f'{publications_path}__status': PUBLICATION_STATUSES.published,
                    f'{publications_path}__type': PUBLICATION_TYPES.external,
                }),
            ),
        )
        # выкидываем списки, получающиеся из-за объектов, на которые не ссылается ни одна публикация
        # почему-то .filter(**{f'{publications_path}publications__isnull': False}) перед .annotate
        # добавляет 3 вложенных INNER JOIN, которые усложняют запрос
        .filter(lang__isnull=False)
        .values_list('value', 'lang', 'publication_ids')
    )
    result = {}
    for value in queryset.values_list(value_field, flat=True).distinct():
        for lang, _ in PUBLICATION_LANGUAGES:
            result[(facet, str(value), lang)] = set()
    for (value, lang, pub_ids) in publications_data:
        result[(facet, str(value), lang)] = set(pub_ids)
    return result


def get_publication_facet_data():
    from intranet.femida.src.api.publications.external.forms import ExternalPublicationListFilterForm

    # {(facet, value, lang): {publication_ids}}
    publication_facet_data = {}

    filter_form = ExternalPublicationListFilterForm()
    facet_querysets_with_fields = filter_form.gen_queryset_with_fields_per_model_facet()
    for facet, value_field, publications_path, queryset in facet_querysets_with_fields:
        publication_facet_data.update(
            _get_facet_data(
                queryset=queryset,
                facet=facet,
                publications_path=publications_path,
                value_field=value_field,
            )
        )

    # отдельно для pro_levels и is_chief, поскольку они хранятся не в виде модели,
    # а как два int поля, обозначающих границы интервала, и булевая переменная
    publications = (
        Publication.objects
        .published_external()
        .select_related(
            'vacancy'
        )
        .prefetch_related(
            'vacancy__cities',
        )
    )

    for lang, _ in PUBLICATION_LANGUAGES:
        for level, _ in PUBLICATION_PRO_LEVELS:
            key = (PUBLICATION_FACETS.pro_levels, level, lang)
            publication_facet_data[key] = set()
        for employment_type, _ in PUBLICATION_EMPLOYMENT_TYPES:
            key = (PUBLICATION_FACETS.employment_types, employment_type, lang)
            publication_facet_data[key] = set()

    for publication in publications:
        pub_id = publication.id
        lang = publication.lang

        vacancy_level_min = publication.vacancy.pro_level_min
        is_min_level_null_or_intern = (
            not vacancy_level_min
            or vacancy_level_min == VACANCY_PRO_LEVELS.intern
        )
        level_min = VACANCY_PRO_LEVELS.junior if is_min_level_null_or_intern else vacancy_level_min
        level_max = publication.vacancy.pro_level_max or VACANCY_PRO_LEVELS.expert
        if level_max != VACANCY_PRO_LEVELS.intern:
            for level in range(level_min, level_max + 1):
                pub_level = VACANCY_TO_PUBLICATION_PRO_LEVELS_MAP.get(level)
                key = (PUBLICATION_FACETS.pro_levels, pub_level, lang)
                publication_facet_data[key].add(pub_id)

        if publication.is_chief:
            key = (PUBLICATION_FACETS.pro_levels, PUBLICATION_PRO_LEVELS.chief, lang)
            publication_facet_data[key].add(pub_id)

        for employment_type in get_vacancy_employment_types(publication.vacancy):
            key = (PUBLICATION_FACETS.employment_types, employment_type, lang)
            publication_facet_data[key].add(pub_id)
    return publication_facet_data


def update_search_vectors(**publications_filter):
    ru_priorities = json.loads(config.JOBS_FTS_PUBLICATION_FIELDS_PRIORITIES_RU)
    en_priorities = json.loads(config.JOBS_FTS_PUBLICATION_FIELDS_PRIORITIES_EN)
    annotated_publications = _get_publication_with_related_data_queryset(**publications_filter)

    indexed_fields = set(ru_priorities) | set(en_priorities)
    synonym_fields = indexed_fields & set(json.loads(config.JOBS_SEARCH_PUBLICATION_SYNONYM_FIELDS))

    words = _get_words_by_publications(annotated_publications, from_fields=synonym_fields)
    synonyms = _get_synonyms_by_publications(words)

    publication_search_vectors = {
        pub.id: get_publication_search_vectors(pub, synonyms[pub.id])
        for pub in annotated_publications
    }

    to_update = list(Publication.objects.filter(id__in=publication_search_vectors))
    for pub in to_update:
        pub.search_vector_en = publication_search_vectors[pub.id][PUBLICATION_LANGUAGES.en]
        pub.search_vector_ru = publication_search_vectors[pub.id][PUBLICATION_LANGUAGES.ru]

    Publication.objects.bulk_update(
        objs=to_update,
        fields=('search_vector_ru', 'search_vector_en'),
        batch_size=10,
    )


def _get_publication_with_related_data_queryset(**publications_filter):
    publications_filter.setdefault('status', PUBLICATION_STATUSES.published)

    annotated_publications = (
        Publication.objects
        .filter(**publications_filter)
        .annotate(
            service_name_ru=F('public_service__name_ru'),
            service_name_en=F('public_service__name_en'),
            prof_name_ru=F('vacancy__profession__name'),
            prof_name_en=F('vacancy__profession__name_en'),
            sphere_name_ru=F('vacancy__professional_sphere__name'),
            sphere_name_en=F('vacancy__professional_sphere__name_en'),
            skills=ArrayAgg(
                'vacancy__skills__name',
                distinct=True,
                filter=Q(vacancy__skills__is_public=True),
            ),
            cities_ru=ArrayAgg(
                'vacancy__cities__name_ru',
                distinct=True,
            ),
            cities_en=ArrayAgg(
                'vacancy__cities__name_en',
                distinct=True,
            ),
        )
    )

    return annotated_publications


def _extract_words(text):
    """
    Разбивает текст на слова, к которым уместно искать синонимы
    """
    return {word.lower() for word in text.split() if len(word) > 2}


def _get_words_by_publications(annotated_publications, from_fields) -> dict[int, dict]:
    words = {}   # {pub_id: {field: {'s1' 's2', ...}}
    for publication in annotated_publications:
        words[publication.id] = dict.fromkeys(from_fields)
        for field in from_fields:
            text = getattr(publication, field) or ''
            if isinstance(text, list):
                text = ' '.join(text)
            words[publication.id][field] = _extract_words(text)
    return words


def _get_synonyms_by_publications(words_by_publications: dict[int, dict]) -> dict[int, dict]:
    result_synonyms = {}  # {pub_id: {field: {'s1' 's2', ...}}
    all_words = set()
    for pub_id, field_to_words in words_by_publications.items():
        for field, words in field_to_words.items():
            all_words.update(words)

    all_synonyms: dict[str, list[str]] = SynonymAPI.get_synonyms(all_words)

    for pub_id, field_to_words in words_by_publications.items():
        result_synonyms[pub_id] = dict.fromkeys(field_to_words)
        for field, words in field_to_words.items():
            known_synonyms = set(
                itertools.chain.from_iterable(all_synonyms.get(w, []) for w in words)
            )
            result_synonyms[pub_id][field] = known_synonyms

    return result_synonyms


def get_publication_search_vectors(publication, synonyms: dict = None) -> dict[str, SearchVector]:
    """
    Собирает объекты SearchVector по fts-полям в соответствии с настройками,
    обогащая их переданными синонимами.
    """
    synonyms = synonyms or {}
    fts_config = {
        PUBLICATION_LANGUAGES.ru: settings.PG_TEXT_SEARCH_CONFIG,
        PUBLICATION_LANGUAGES.en: settings.PG_TEXT_SEARCH_CONFIG_EN,
    }
    fts_priorities = {
        PUBLICATION_LANGUAGES.ru: json.loads(config.JOBS_FTS_PUBLICATION_FIELDS_PRIORITIES_RU),
        PUBLICATION_LANGUAGES.en: json.loads(config.JOBS_FTS_PUBLICATION_FIELDS_PRIORITIES_EN),
    }
    result_vectors = dict.fromkeys(fts_priorities, '')
    for lang, config_name in fts_config.items():
        ts_vectors = []
        priorities = fts_priorities[lang]
        if not priorities:
            continue
        for field, weight in priorities.items():
            value = _get_db_value_with_synonyms(
                publication=publication,
                field=field,
                synonyms=synonyms.get(field),
            )
            ts_vectors.append(SearchVector(value, config=config_name, weight=weight))
        result_vectors[lang] = functools.reduce(operator.add, ts_vectors)

    return result_vectors


def _get_db_value_with_synonyms(publication, field, synonyms=None) -> Value:
    """
    Для использования значения в SearchVector нужно вручную указывать
    `output_field=models.TextField()`
    https://code.djangoproject.com/ticket/30446
    """
    synonyms = synonyms or set()
    field_value = getattr(publication, field, '')
    synonyms_str = ' '.join(synonyms)
    return Value(f'{field_value} {synonyms_str}', output_field=TextField())
