import logging
from urllib.parse import urlparse

import requests
from django.conf import settings

from intranet.search.core.models import ClickedMarkedUrl
from intranet.search.core.sources.utils import get_popularity_factors
from intranet.search.core.sources.wiki.base import BaseWikiSource
from intranet.search.core.utils.http import call_with_retry

log = logging.getLogger(__name__)


class Source(BaseWikiSource):
    default_options = dict(clicked_marked_urls=False, **BaseWikiSource.default_options)
    need_content = True

    def do_walk(self, url=None, **kwargs):
        if self.options['clicked_marked_urls']:
            to_index = (
                ClickedMarkedUrl.objects
                .filter(url__startswith=self.api['site_endpoint'].url())
                .values_list('url', flat=True).distinct()
            )
            for url in to_index:
                self.next('fetch', uri=urlparse(url).path)
        else:
            super().do_walk(url)

    @property
    def is_delta(self):
        return super().is_delta or self.options['clicked_marked_urls']

    def fetch_translation(self, data):
        result = {}

        query = {'lang': 'ru-en', 'format': 'plain'}
        translate_url = settings.ISEARCH['api']['translate'].url(query=query)
        data = {
            'text': [data['title'], data['text_content']] + [p.get('title', '') for p in data['parents']],
        }

        response = call_with_retry(requests.post, translate_url, data=data, timeout=60)
        if response.ok:
            translated_data = response.json()

            # документ мог быть изначально на английском, тогда его перевод не нужен
            if translated_data['detected']['lang'] == 'ru':
                title, text_content, *parents = translated_data['text']
                result = {
                    'title': title,
                    'text_content': text_content,
                    'parents': parents,
                }
        else:
            log.warning('Translate request failed %s, %s', response.status_code, response.content)
        return result

    def extra_fetch(self, url, data):
        try:
            translated = self.fetch_translation(data)
        except Exception:
            log.exception('Cannot fetch translation for url %s', url)
            translated = {}

        extra_data = get_popularity_factors(url)
        extra_data['translated'] = translated
        return extra_data

    def emit_factors(self, doc, data):
        super().emit_factors(doc, data)

        url_path = data['wiki_page_url']
        url_parts = {s.lower() for s in url_path.split('/')}

        isHR = int('hr' in url_parts)
        self.emit_factor(doc, 'isHR', isHR)

        # флаг, страница из кластера /pr/waves/
        isPrWaves = int(url_path.lower().startswith('/pr/waves/'))
        self.emit_factor(doc, 'isPrWaves', isPrWaves)

        self.emit_factor(doc, 'isWiki', 1)

        profile = self.profile_storage.get('wiki', data['wiki_page_url'].lower())
        self.emit_factor(doc, 'pageViews', profile.get('page_views', 0))

    def emit_facet_attrs(self, doc, data):
        super().emit_facet_attrs(doc, data)

        if data['is_official']:
            doc.emit_facet_attr('page_type', 'official', label='Официальная', label_en='Official')
