import logging
import os
from urllib.parse import quote

from django.utils import timezone
from django.utils.functional import cached_property

from django.conf import settings
from yt.wrapper import YtHttpResponseError, TablePath

from intranet.search.core import context
from intranet.search.core.ferryman_client import FerrymanClient
from intranet.search.core.yt_client import client as yt, yt_opts, create_table_and_insert_rows
from intranet.search.core.utils import serialize_snippet, get_kps, reraise_as_recoverable
from intranet.search.core.utils import http, json
from intranet.search.core.errors import UnrecoverableError
from intranet.search.core.storages import Storage, StorageUnavailable, FerrymanTableStorage


log = logging.getLogger(__name__)

TYPE_PROPERTY = '#p'
TYPE_SEARCH_ATTR_INT = '#i'
TYPE_SEARCH_ATTR_LITERAL = '#l'
TYPE_GROUP_ATTR_INT = '#g'
TYPE_GROUP_ATTR_LITERAL = '#h'
TYPE_FACTOR = '#f'
TYPE_FACET = '#pl'


class InvalidDocument(UnrecoverableError):
    """Ошибка, когда SaaS не хочет принимать наш документ.
    Например, мы ему послали не те факторы
    """


class DocumentSerializer:
    """ Сериализатор документа в формат, принимаемый хранилищем
    """

    def __init__(self, document, revision, body_format='json'):
        self.document = document
        self.revision = revision
        self.body_format = body_format
        self.kps = get_kps(revision['id'])

        self._factors = None
        self._zone_factors = None

    @property
    def factors(self):
        if self._factors is None:
            self._fill_factors_info()
        return self._factors

    @property
    def zone_factors(self):
        if self._zone_factors is None:
            self._fill_factors_info()
        return self._zone_factors

    def format_document_for_update(self, **kwargs):
        self.append_snippets()
        body = self.change_body(self.document.body)

        options = {
            'mime_type': 'text/xml',
            'charset': 'utf8',
            'language': 'ru',
            'language2': 'ru',
            'language_default': 'ru',
            'language_default2': 'ru',
            'modification_timestamp': self.document.updated_ts,
            'check_only_before_reply': False,
            'realtime': True,
        }
        options.update(**kwargs)

        message = {
            'action': 'modify',
            'prefix': self.kps,
            'docs': [
                {
                    'options': options,
                    'url': self.document.url,
                    'body': body,
                }
            ],
        }

        for attr_type, attributes in self.document.attributes.items():
            for attribute in attributes:
                # Не сохраняем пустые атрибуты, но 0 оставляем
                if attribute['value'] is not None and attribute['value'] != '':
                    self.append_attribute(message, attribute['name'], attribute['value'], attr_type)

        self.append_factors(self.document, message)
        return message

    def format_document_for_delete(self, **kwargs):
        message = {
            'prefix': self.kps,
            'action': 'delete',
            'docs': [{
                'url': self.document.url,
                'options': {'modification_timestamp': self.document.updated_ts},
            }]
        }
        return message

    def append_snippets(self):
        for lang, snippet in self.document.snippets.items():
            if snippet is None:
                continue
            body = serialize_snippet(snippet)
            body = body.replace('\t', ' ')
            self.document.emit_property_attr('doc_snippet_%s' % lang, body)

    def change_body(self, body):
        renamed_body = self.rename_zones(body, full=True)
        body = self.rebuild_dict(renamed_body)
        return body

    def append_attribute(self, message, name, value, attr_type='property'):
        mapping = {
            'property': TYPE_PROPERTY,
            'search_int': TYPE_SEARCH_ATTR_INT,
            'search_literal': TYPE_SEARCH_ATTR_LITERAL,
            'group_int': TYPE_GROUP_ATTR_INT,
            'group_literal': TYPE_GROUP_ATTR_LITERAL,
            'factor': TYPE_FACTOR,
            'facet': TYPE_FACET,
        }

        if attr_type == 'group':
            if isinstance(value, int):
                value = int(value)
                attr_type = 'group_int'
            else:
                value = str(value)
                attr_type = 'group_literal'

        data = {'value': value, 'type': mapping[attr_type]}

        if attr_type == 'facet':
            name = 's_' + name if not name.startswith('s_') else name

        if name in message['docs'][0]:
            message['docs'][0][name].append(data)
        else:
            message['docs'][0][name] = [data]

    def append_factors(self, document, message):
        def _factor_name(search, name):
            return f'STAT_{search}_{name}'

        for key, factor_data in document.factors.items():
            factor_type = key if key == 'meta' else self.revision['search']
            for name, value in factor_data.items():
                self.append_attribute(message, _factor_name(factor_type, name), value, 'factor')

    def rename_zones(self, zone, full=False):
        """
        Переименует зоны документа
        - переименовывает ключи в соответсвии с глубиной вложенности диктов
        - НЕ переименовывает ключи которые начинаются с `z_`
        - переименовываем ключи <name> в z_<search_id>_<name>
        - но только если <name> нет в зонных факторах
        - учитывает зонные метафакторы

        full - переименовывать ли вложенные зоны, которые не образуют
        зонные факторы (нужно для индексации xml)
        """
        new_body = {}
        search_id = self.revision['search']

        if not isinstance(zone, dict):
            return zone

        for key, value in zone.items():
            if not isinstance(key, str):
                log.warning('Got invalid zone name. Zone: %s, Doc: %s', key, self.document.url)
                key = str(key)

            new_key = key

            if isinstance(value, dict):
                value = self.rename_zones(value)

            if isinstance(value, (tuple, list, set)):
                value = [self.rename_zones(z) for z in value]

            if key.startswith('z_'):
                new_key = key
                # обрезаем z_<search_id>_
                key = key[2:].replace(search_id + '_', '')
            elif key.startswith('!'):
                # Это нужно для зон которые не являются зонными
                # факторами, но точные имена которых важны (например
                # прописаны в конфиге платформы). Например зона из
                # которой не генерируются пассажи должна всегда
                # одинаково называться
                new_key = f'z_{search_id}_{key[1:]}'
                key = key[1:]
            else:
                # Есмотрим есть ли такое имя в списке зонных факторов
                if key in self.zone_factors['self']:
                    new_key = f'z_{search_id}_{key}'
                elif key in self.zone_factors['meta']:
                    new_key = 'z_meta_%s' % key
                elif full:
                    new_key = f'z_{search_id}_{key}'

            if (key in self.zone_factors['self'] and
                    key in self.zone_factors['meta']):
                meta_key = 'z_meta_%s' % key
                new_body[new_key] = {meta_key: value}
            else:
                new_body[new_key] = value
        return new_body

    def rebuild_dispatcher(self, body, *args, **kwargs):
        if isinstance(body, dict):
            return self.rebuild_dict(body, *args, **kwargs)
        if isinstance(body, (list, tuple, set)):
            return self.rebuild_list(body, *args, **kwargs)
        else:
            return self.rebuild_simple(body, *args, **kwargs)

    def rebuild_dict(self, body, *args, **kwargs):
        res = {'value': '', 'type': 'zone'}
        to_fill = {}
        res['children'] = to_fill

        for key, value in body.items():
            to_fill[key] = self.rebuild_dispatcher(value)
        return res

    def rebuild_list(self, body, prefs=(), *args, **kwargs):
        if all(map(lambda x: not isinstance(x, (dict, tuple, list, set)), body)):
            # в простом случае склеиваем все в одну строку
            res = {'value': ' .\n'.join(map(str, body)),
                   'type': 'zone'}
        else:
            # обрабатываем отдельно каждый элемент
            res = {'value': '', 'children': {}, 'type': 'zone'}
            for i, item in enumerate(body):
                res['children']['item_%s' % i] = self.rebuild_dispatcher(
                    item, prefs)
        return res

    def rebuild_simple(self, body, *args, **kwargs):
        return {'value': str(body), 'type': 'zone'}

    def _fill_factors_info(self):
        """ Ленивая загрузка информации про факторы
        """
        settings_context = context.SettingsContext()

        self._factors = {}
        self._zone_factors = {
            'self': [],
            'meta': [],
        }
        search = self.revision['search']
        factors = settings_context.factors_for_search(search)

        self._zone_factors['self'] = set(factors['zone'])
        self._factors[search] = []

        for static in factors['static']:
            self._factors[search].append(static)

        meta_factors = settings_context.meta_factors_for_search(search, self.revision['index'])

        if meta_factors:
            self._factors['meta'] = []
            for name in meta_factors['static']:
                self._factors['meta'].append(name)
            self._zone_factors['meta'] = set(meta_factors['zone'])


class DocumentStorageBase(Storage):
    """ Базовый класс хранилища инексируемых документов
    """
    serializer_class = DocumentSerializer
    storage_type = None

    def __init__(self, revision, indexation_id=None):
        self.revision = revision
        self.indexation_id = indexation_id
        self.kps = get_kps(revision['id'])
        self.body_format = 'json'

    @property
    def service(self):
        return self.revision['service']

    def setup(self):
        pass

    def serialize_document(self, document, realtime=False, delete=False):
        serializer = self.serializer_class(document, self.revision, body_format=self.body_format)
        if delete:
            serialized_document = serializer.format_document_for_delete()
        else:
            serialized_document = serializer.format_document_for_update(realtime=realtime)
        return serialized_document

    def update(self, document, realtime=False):
        """ Обновляет документ в хранилище
        """
        raise NotImplementedError()

    def flush(self, delta=True, realtime=False):
        """ Принудительно добавляет в индекс документы, отправленные
        не в реалтайме
        """
        raise NotImplementedError()

    def purge(self):
        """ Очищает индекс для текущей ревизии
        """
        raise NotImplementedError()

    def delete(self, document):
        """ Удаляет документ из хранилища
        """
        raise NotImplementedError()

    def delete_revision(self):
        """ Удаляет из индекса всю ревизию
        """
        raise NotImplementedError()

    def delete_by_filter(self):
        """ Удаляет набор документов по определенному фильтру
        """


class SaasDocumentStorage(DocumentStorageBase):
    """ Хранилище документов в SaaS
    """
    storage_type = 'saas'

    def __init__(self, *args, **kwarg):
        super().__init__(*args, **kwarg)

        self.session = http.create_session(max_retries=3)
        self.url = settings.ISEARCH['api']['saas'][self.service]['indexer'].url()
        self.body_format = 'json'

    def update(self, document, serialized_document=None, realtime=False):
        if not serialized_document:
            serialized_document = self.serialize_document(document, realtime=realtime)
        self.send_request(serialized_document)

    def delete(self, document):
        message = self.serialize_document(document, delete=True)
        self.send_request(message)

    def flush(self, delta=True, realtime=False):
        if realtime:
            # для realtime индексации не нужно ничего флашить
            return
        doc = {'prefix': self.kps,
               'action': 'reopen'}
        self.send_request(doc)

    def purge(self):
        doc = {'prefix': self.kps,
               'action': 'delete',
               'request': 'url:"*"&kps=%s' % self.kps}
        self.send_request(doc)

    def delete_revision(self):
        doc = {'prefix': self.kps,
               'action': 'delete',
               'request': '$remove_kps$'}
        self.send_request(doc)

    def delete_by_filter(self, query):
        request = '{query}&service={service}&kps={kps}'.format(query=quote(query), kps=self.kps,
                                                               service=self.service)
        doc = {
            'prefix': self.kps,
            'action': 'delete',
            'request': request
        }
        self.send_request(doc)

    def send_request(self, message):
        def _log_error(e):
            resp = getattr(e, 'response', None)
            if resp is not None:
                msg = resp.text
            else:
                msg = e
            log.exception('Failed to send document to SaaS: %s', msg)

        params = {'_retries': 0, 'timeout': 60}
        params.update(data=json.dumps(message),
                      headers={'Content-Type': 'application/json'})

        def is_obsolete(response):
            return 'Incoming document is obsolete' in response.text

        try:
            response = http.call_with_retry(self.session.post, self.url, **params)
        except http.HTTPError as e:
            status_code = e.response.status_code
            if is_obsolete(e.response):
                log.warning('Document is obsolete: %s', e.response.text)
                return
            elif 400 <= status_code < 500:
                raise InvalidDocument(e, e.response.text)
            elif 500 <= status_code < 600:
                raise StorageUnavailable(e, e.response.text)
            _log_error(e)
            raise
        except http.ERRORS as e:
            _log_error(e)
            raise
        else:
            if is_obsolete(response):
                log.warning('Document is obsolete: %s', response.text)
        return response


class LogbrokerDocumentStorage(SaasDocumentStorage):
    """ Хранилище документов в SaaS, но поставка туда через логброкер
    """
    storage_type = 'logbroker'

    def __init__(self, *args, **kwarg):
        super().__init__(*args, **kwarg)
        endpoint = settings.ISEARCH['api']['saas_logbroker']['push']
        self.url = endpoint.url().format(service=self.service)


class YTDocumentStorage(DocumentStorageBase):
    """ Хранилище документов в YT
    """
    storage_type = 'yt'
    TABLE_SCHEMA = [
        {'name': 'url', 'type': 'string'},
        {'name': 'timestamp', 'type': 'int64'},
        {'name': 'JsonMessage', 'type': 'any'},
        {'name': 'raw', 'type': 'any'},
        {'name': 'deleted', 'type': 'boolean'},
    ]
    base_path = os.path.join(yt_opts['base_path'], 'data', settings.YENV_TYPE)
    name_sep = '-'

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.ferryman_table_storage = FerrymanTableStorage()

    def setup(self):
        self.create_table()

    def get_table_path(self):
        data = {'search': self.revision['search'],
                'index': self.revision['index'],
                'revision': self.revision['id']}
        if self.indexation_id:
            data['suffix'] = self.indexation_id
            pushes = False
        else:
            data['suffix'] = timezone.now().strftime('%Y-%m-%dT%H:00:00')
            pushes = True

        return self.ferryman_table_storage.get_yt_table_path(pushes=pushes, **data)

    @cached_property
    def yt_table(self):
        """ Возвращает таблицу для записи
        """
        return TablePath(self.get_table_path(), append=True)

    def create_table(self, dynamic=True):
        path = self.get_table_path()
        yt.create('table', path, recursive=True,
                  attributes={'schema': self.TABLE_SCHEMA, 'dynamic': dynamic})
        if dynamic:
            yt.set(f"{path}/@tablet_cell_bundles", yt_opts['bundle'])
            yt.mount_table(path, sync=True)

    def _format_row(self, document, message, deleted=False):
        row = {
            'url': document.url,
            'JsonMessage': json.dumps(message),
            'raw': document.raw_data,
            'timestamp': document.updated_ts,
            'deleted': deleted,
        }
        return row

    def update(self, document, serialized_document=None, realtime=False):
        if not serialized_document:
            serialized_document = self.serialize_document(document, realtime=realtime)
        message = serialized_document

        self.write_row(self._format_row(document, message))

    def delete(self, document):
        message = self.serialize_document(document, delete=True)
        self.write_row(self._format_row(document, message, deleted=True))

    @reraise_as_recoverable(YtHttpResponseError)
    def write_row(self, row):
        try:
            create_table_and_insert_rows(self.yt_table, [row], self.TABLE_SCHEMA)
        except YtHttpResponseError as e:
            if e.contains_text('Value is too long'):
                # FIXME ISEARCH-5724 временный фикс, который существенно уменьшает
                # количество проблем, но не лечит целиком
                row['raw'] = ''
                yt.insert_rows(self.yt_table, [row])
            else:
                raise

    @classmethod
    def convert_to_static(cls, table):
        if yt.get(f'{table}/@dynamic'):
            yt.unmount_table(table, sync=True)
            yt.alter_table(table, dynamic=False)

    def flush(self, delta=True, table=None, realtime=False):
        table = table or self.yt_table
        self.convert_to_static(table)
        return self.send_ferryman_request(str(table), delta)

    def purge(self):
        return self.delete_revision()

    def delete_revision(self):
        """ Удаление ревизии через ferryman. Для этого нужно создать пустую таблицу
        и скормить её в ferryman с указанием ревизии.
        """
        path = self.get_table_path()
        if yt.exists(path):
            yt.remove(path)
        self.create_table(dynamic=False)

        return self.send_ferryman_request(path, delta=False)

    @cached_property
    def ferryman_client(self):
        return FerrymanClient(service=self.service)

    def send_ferryman_request(self, table, delta):
        """ Отправляет запрос об индексации в Ferryman
        """
        batch_id = self.ferryman_client.add_table(self.kps, table, delta)
        self.ferryman_table_storage.create(batch_id=batch_id, path=table,
                                           revision_id=self.revision['id'])
        return batch_id
