import logging
from collections import defaultdict
from itertools import chain


from intranet.search.core.snippets.mldescription import MlSnippet
from intranet.search.core.sources.mldescription.utils import get_ml_name, is_trash
from intranet.search.core.sources.mldescription import ml_sources
from intranet.search.core.swarm import Indexer
from intranet.search.core.utils import http, get_ids_repository
from intranet.search.core.utils.cache import CacheAdapter

log = logging.getLogger(__name__)

service_contacts_repo = get_ids_repository('abc', 'service_contacts', api_version=4)


class Services(CacheAdapter):
    def __init__(self, cache):
        super().__init__(cache, 'mlserv')

    def prepare_services(self):
        services = defaultdict(dict)
        lookup = {
            'type__code': 'email_ml',
            'fields': 'content,service.id,service.name',
        }
        for contact in service_contacts_repo.get(lookup):
            ml_name = get_ml_name(contact['content'])
            if ml_name:
                services[ml_name][contact['service']['id']] = contact['service']

        for ml_name, ml_services in services.items():
            self.set(ml_name, list(ml_services.values()))


class Source(Indexer):
    """ Индексатор список рассылок
    """

    def __init__(self, options):
        super().__init__(options)
        self.session = http.create_session()
        self.services = Services(self.cache_storage)

    def do_setup(self, **kwargs):
        self.services.prepare_services()
        log.info('Services ok')
        self.next('walk')

    def do_walk(self, url=None, **kwargs):
        source_names = ml_sources.__all__
        # в ключах можно указывать, какие именно источники
        # переиндекисровать: ml, separator, yamoney
        if self.options['keys']:
            source_names = [k for k in self.options['keys'] if k in source_names]

        sources = [getattr(ml_sources, k)() for k in source_names]
        email_cache = set()
        for i, ml in enumerate(chain(*sources)):
            if ml['email'] in email_cache:
                continue

            self.next('create', ml=ml)
            email_cache.add(ml['email'])

            if self.options['limit'] and i >= self.options['limit']:
                break

    def do_create(self, ml=None, **kwargs):
        doc = self.create_document(ml.get('url', ml['email']))
        ml_name = ml['name']

        body = {
            'name': ml_name,
            'adresses': {
                'exchange': ml.get('exchange', ''),
                'email': ml['email'],
            },
            'info': ml.get('info', ''),
        }
        body['services'] = [{'name': service['name']}
                            for service in self.services[ml_name]]
        doc.emit_factor('titleLength', len(ml_name))
        doc.emit_factor('titleSeparatorsCount',
                        ml_name.count('-') + ml_name.count('_'))

        doc.emit_factor('is%s' % ml['email_type'].title(), 1)
        doc.emit_factor('isSupport',
                        int('support' in ml['email'].rsplit('@')[-1]))
        doc.emit_factor('isTrash', int(is_trash(ml_name)))

        doc.emit_body(body)

        # поисковые атрибуты
        for name in ['name', 'email', 'exchange', 'email_type']:
            doc.emit_search_attr('s_%s' % name, ml.get(name))

        doc.emit_suggest_attr(ml['email'])
        doc.emit_snippet(MlSnippet(ml))

        self.next('store', document=doc, body_format='json')
