import time
from multiprocessing import Queue, Process
from typing import List

from ..services.search import Search
from ..settings import config_app, logger
from ..utils import get_attr_value_by_key
from ..utils.messages import (
    Document,
    IndexTime,
    LostDocsCount,
    IndexedDocsCount,
    CheckedDocsCount,
    CheckedNewDocsCount,
    NotFoundDocsByUrlCount,
    NotFoundDocsByTitleWithQuotesCount,
    NotFoundDocsByTitleWithoutQuotesCount,
    UpperInvalidResponseCount,
    IntInvalidResponseCount,
    BaseInvalidResponseCount,
    IntAnswerNotCompletedCount,
    IntErrorResponseCount,
    DocsNotContainsInResultPageCount,
    ExpectedDocsCount,
    InvalidModificationTimestampCount,
    TimedeltaBetweenModificationAndCurrentTime,
    RefreshMonDocsLifetime
)
from ..utils.exceptions import (
    InvalidResponseException,
    AnswerNotCompletedException,
    ErrorResponseException
)


class SearchDataExtractor(Process):
    def __init__(self,
                 search_data_extractor_queue: Queue,
                 unistat_queue: Queue):
        super().__init__()
        self.__search_data_extractor_queue = search_data_extractor_queue
        self.__unistat_queue = unistat_queue
        self.__search_service = Search(upper_search_url=config_app['search']['upper_search_url'],
                                       int_search_url=config_app['search']['int_search_url'],
                                       upper_cgi_params=config_app['search']['upper_cgi_params'],
                                       int_cgi_params=config_app['search']['int_cgi_params'],
                                       base_cgi_params=config_app['search']['base_cgi_params'])

    def __contains_document_in_result_page(self,
                                           document: Document,
                                           result_docs: List[dict]) -> bool:
        return any(document.doc_id == result_doc['docid'].split('-')[-1]
                   for result_doc in result_docs)

    def __find_document_on_upper_with_quotes(self,
                                             title: str,
                                             document: Document) -> bool:
        docs_from_upper = self.__search_service.request_to_upper(query=f'"{title}"', noreask=True)
        if docs_from_upper:
            if not self.__contains_document_in_result_page(document=document, result_docs=docs_from_upper):
                self.__unistat_queue.put(DocsNotContainsInResultPageCount(value=1, document=document))
        else:
            self.__unistat_queue.put(NotFoundDocsByTitleWithQuotesCount(value=1, document=document))
        return bool(docs_from_upper)

    def __find_document_on_upper_without_quotes(self,
                                                title: str,
                                                document: Document) -> bool:
        docs_from_upper = self.__search_service.request_to_upper(query=title)
        if docs_from_upper:
            if not self.__contains_document_in_result_page(document=document, result_docs=docs_from_upper):
                self.__unistat_queue.put(DocsNotContainsInResultPageCount(value=1, document=document))
        else:
            self.__unistat_queue.put(NotFoundDocsByTitleWithoutQuotesCount(value=1, document=document))
        return bool(docs_from_upper)

    def __find_document_on_int(self,
                               document: Document) -> bool:
        docs_from_int = None
        try:
            docs_from_int = list(self.__search_service.get_documents_from_int(query=f'url:{document.url}'))
        except (InvalidResponseException, KeyError):
            document.retry_count += 1
            self.__unistat_queue.put(IntInvalidResponseCount(value=1, document=document))
        except AnswerNotCompletedException:
            document.retry_count += 1
            self.__unistat_queue.put(IntAnswerNotCompletedCount(value=1, document=document))
        except ErrorResponseException:
            document.retry_count += 1
            self.__unistat_queue.put(IntErrorResponseCount(value=1, document=document))

        if docs_from_int:
            int_document = docs_from_int[0]
            # Подмешиваем DocId в объект документа, если это новый документ, который мы нашли на инте
            if document.doc_id is None:
                document.doc_id = int_document['DocId'].split('-')[-1]
            base_timestamp = int(get_attr_value_by_key(data=int_document['ArchiveInfo']['GtaRelatedAttribute'],
                                                       key='_BaseTimestamp'))
            self.__unistat_queue.put(IndexTime(value=(base_timestamp - document.mercury_timestamp),
                                               document=document))

            title = int_document['ArchiveInfo']['Title']
            try:
                if not self.__find_document_on_upper_with_quotes(title=title, document=document):
                    self.__find_document_on_upper_without_quotes(title=title, document=document)
            except (InvalidResponseException, KeyError):
                document.retry_count += 1
                self.__unistat_queue.put(UpperInvalidResponseCount(value=1, document=document))

        return bool(docs_from_int)

    def __find_document_on_base(self,
                                document: Document) -> bool:
        docs_from_base = None
        try:
            docs_from_base = self.__search_service.request_to_base(searcher_hostname=document.searcher_hostname,
                                                                   doc_id=document.doc_id)
        except (InvalidResponseException, KeyError):
            document.retry_count += 1
            self.__unistat_queue.put(BaseInvalidResponseCount(value=1, document=document))

        if docs_from_base:
            return any(base_document['DDK']['Timestamp'] >= document.modification_timestamp
                       for base_document in docs_from_base)
        return False

    def run(self):
        anchor_url = None
        expected_docs = 0

        while True:
            try:
                document: Document = self.__search_data_extractor_queue.get()
                current_ts = int(time.time())

                timedelta = document.modification_timestamp - current_ts
                if timedelta > 0 and not document.is_invalid_mtime:
                    document.is_invalid_mtime = True
                    self.__unistat_queue.put(InvalidModificationTimestampCount(value=1, document=document))
                    self.__unistat_queue.put(TimedeltaBetweenModificationAndCurrentTime(value=timedelta,
                                                                                        document=document))

                if anchor_url == document.url:
                    self.__unistat_queue.put(ExpectedDocsCount(value=expected_docs, document=None))
                    expected_docs = 0
                elif anchor_url is None:
                    anchor_url = document.url
                    expected_docs = 0
                expected_docs += 1

                if document.searcher_hostname is None:
                    # Новый документ
                    document_is_indexed = self.__find_document_on_int(document=document)
                else:
                    # Обновленный документ
                    document_is_indexed = self.__find_document_on_base(document=document)
                    if document_is_indexed:
                        document_on_int = self.__find_document_on_int(document=document)
                        if not document_on_int:
                            # Если после индексации обновленного документа базовым он пропал с инта по url
                            self.__unistat_queue.put(NotFoundDocsByUrlCount(value=1, document=document))

                self.__unistat_queue.put(CheckedDocsCount(value=1, document=document))
                if document.is_new:
                    self.__unistat_queue.put(CheckedNewDocsCount(value=1, document=document))

                document_is_retry = False
                document_lifetime = current_ts - int(document.received_at.timestamp())
                if document_is_indexed:
                    self.__unistat_queue.put(RefreshMonDocsLifetime(value=document_lifetime,
                                                                    document=document))
                    self.__unistat_queue.put(IndexedDocsCount(value=1, document=document))
                else:
                    if (current_ts - document.mercury_timestamp) >= config_app.getint('search', 'index_timeout'):
                        self.__unistat_queue.put(RefreshMonDocsLifetime(value=document_lifetime,
                                                                        document=document))
                        self.__unistat_queue.put(LostDocsCount(value=1, document=document))
                    elif document.retry_count < config_app.getint('search', 'max_retry_count'):
                        document.is_new = False
                        document_is_retry = True
                        self.__search_data_extractor_queue.put(document)

                if not document_is_retry:
                    anchor_url = None
            except Exception as e:
                logger.critical(e, exc_info=True)
                raise e from None
