import logging
from urllib.parse import urlparse
from django.conf import settings

from intranet.search.core.sources.utils import (
    find_one,
    get_breadcrumbs,
    get_text_content,
    get_short_desc,
    get_elements_content,
)
from intranet.search.core.swarm import ExtendedRobotIndexer

from .utils import Metrix, types

log = logging.getLogger(__name__)


class Source(ExtendedRobotIndexer):
    index = 'lego'
    source = 'lego'

    disallow = (
        r'/__example/?',
    )

    allowed_statuses = [200, 404]

    def __init__(self, options):
        super().__init__(options)
        self.base_url = settings.ISEARCH['api']['lego'][self.index].base_url()
        self.metrix = Metrix(self.cache_storage, self.index)

    def do_setup(self, **kwargs):
        self.metrix.prepare_metrix()
        self.next('walk')

    def get_doc_info(self, parsed):
        data = {
            'type': '',
            'library': '',
            'version': '',
            'level': '',
            'block': '',
            'status': '',
            'doc_type': '',
        }

        for k in data.keys():
            data[k] = find_one(parsed, '//meta[@name="%s"]/@content' % k)

        return data

    def get_title(self, parsed, data):
        if data['type'] == 'block':
            return 'Блок {} {} [{} {}]'.format(data['block'], data['level'], data['library'], data['version'])

        title_elem = find_one(parsed, '//*[@data-search="title"]', None)

        if title_elem is None:
            title_elem = find_one(parsed, '//title', None)

        if title_elem is None:
            return ''

        return get_text_content(title_elem)

    def get_walk_urls(self, index_page):
        # переопредяем родительский метод
        # data-lib-url помечены те элементы, которые на фронте лего
        # формируются в кнопки
        return set(map(
            self.normalize_url,
            index_page.xpath('//a/@href') + index_page.xpath('//@data-lib-url'),
        ))

    def get_doc(self, parsed, url):
        doc = self.create_document(url)
        data = self.get_doc_info(parsed)

        parsed_url = urlparse(doc.url)

        if data['type'] == 'block':
            parts = [p for p in parsed_url.path.split('/') if p]
            if parts[-1] != data['block']:
                log.info('Omitting %s' % doc.url)
                return

        for k in ['library', 'version', 'level', 'block']:
            if data[k]:
                doc.emit_facet_attr(k, data[k], data[k])

        if data['status'] == 'current':
            doc.emit_factor('isCurrent', 1)

        if data['doc_type'] == 'description':
            doc.emit_factor('isDescription', 1)

        doc.emit_facet_attr('type', data['type'],
                            label=types[data['type']]['ru'],
                            label_en=types[data['type']]['en'])

        title = self.get_title(parsed, data)

        group_name = group_url = ''

        if data['type'] == 'block':
            group_value = 'block_{}_{}'.format(data['library'], data['block'])
            group_name = 'Блок {} [{}]'.format(data['block'], data['library'])
            if data['level'] == 'desktop' and data['status'] == 'current':
                group_url = doc.url
        elif data['type'] == 'library':
            group_value = 'lib_{}_{}'.format(data['library'], data['version'])
            group_name = 'Библиотека {} [{}]'.format(data['library'], data['version'])
            if data['doc_type'] == 'description':
                group_url = doc.url
        else:
            group_value = doc.url

        if data['type'] in ['block', 'library']:
            doc.emit_factor('libPopularity', self.metrix[data['library']] or 0)

        doc.emit_factor('pageViews', self.metrix[parsed_url.path] or 0)

        if group_value:
            doc.emit_group_attr('lego_grp', group_value, label=group_name, url=group_url)
        else:
            log.warning('No group value %s', url)

        content = find_one(parsed, '//*[@data-search="content"]', parsed)

        body = {
            'content': get_text_content(content),
            'z_ns_hidden': {
                'title': title,
                'subtitle': get_elements_content(content, ['h1', 'h2', 'h3']),
                'minortitle': get_elements_content(content, ['h4', 'h5', 'h6']),
                'library': data['library'],
                'block': data['block'],
            }
        }
        doc.emit_body(body)

        raw_bcs = find_one(parsed, '//meta[@name="breadcrumbs"]/@content')

        snippet = {
            'title': title or data['block'] or data['library'],
            'breadcrumbs': get_breadcrumbs(raw_bcs, self.base_url) if raw_bcs else [],
            'description': get_short_desc(get_text_content(content), 200),
        }

        snippet.update(data)

        doc.emit_snippet(snippet, 'ru')
        return doc
