import os
from collections import namedtuple
from logging import getLogger

import requests

from django.conf import settings
from django.core.management.base import BaseCommand
from yt.wrapper import TablePath

from intranet.search.abovemeta.search_response import SaaSSearchResponse
from intranet.search.core.tvm import tvm2_client
from intranet.search.core.utils.saas import get_all_factors
from intranet.search.core.yt_client import client as yt

log = getLogger(__name__)

SearchQuery = namedtuple('SearchQuery', ('id', 'text', 'url', 'rating'))


class Command(BaseCommand):
    help = 'Collect factors for search pool'

    def add_arguments(self, parser):
        parser.add_argument('--service', '-s', action='store', default='intrasearch-wiki',
                            help='saas service for scrapping')
        parser.add_argument('--kps', action='store', default='76585,76770,76769', help='saas kps')
        parser.add_argument('--input', '-i', action='store', default='', help='input yt table name')
        parser.add_argument('--output', '-o', action='store', default='', help='output yt path for pool')
        parser.add_argument('--params', '-p', action='store', default='', help='extra saas cgi parameters')

    def handle(self, **options):
        self.options = options
        self.factor_names = self.get_factor_names()

        self.save_features(self.gen_search_results())
        self.save_factor_names()

    def gen_search_results(self):
        data = yt.read_table(self.options['input'])
        for i, row in enumerate(data):
            text = row['query']
            query = SearchQuery(row['qid'], text, row['url'], row['rating'])
            log.debug('Retrieve for: %s, [%s], url=%s', query.id, query.text, query.url)
            results = self.get_search_result(query)

            if not results:
                log.warning('Cannot find enough for query %s', query)
                continue

            for doc in results:
                yield [query.id, query.rating, doc.url, 0] + self._get_doc_factor_list(doc)

    def get_factor_names(self):
        """ Достает имена всех наших факторов из настроек сааса.
        Собирает их в массив, где индекс фактора равен id фактора в конфиге сааса.
        На месте пропусков вставляет факторы "factor_<index>". Это приходится делать, потому что
        иначе формулу невозможно использовать в SaaS из-за неправильных факторов.

        Например, если в саасе такие факторы: {'STAT_meta_obsolete': 1, 'STAT_meta_startpage': 3}
        То здесь вернется: ['factor_0', 'STAT_meta_obsolete', 'factor_2', 'STAT_meta_startpage']
        """
        factors = get_all_factors(self.options['service'])
        factors_dict = {v: k for k, v in factors.items()}
        factor_names = []
        for f_index in range(0, max(factors_dict) + 1):
            factor_names.append(factors_dict.get(f_index) or f'factor_{f_index}')
        return factor_names

    def get_search_result(self, query, retries=10):
        """ Делает запрос к саасу и возвращает список документов с факторами
        """
        while retries > 0:
            try:
                headers = {settings.TVM2_SERVICE_HEADER: tvm2_client.get_service_ticket('saas')}
                response = requests.get(self._get_saas_url(query), headers=headers)
                response.raise_for_status()
                response_data = response.json()
                data = SaaSSearchResponse(response_data)
                docs = list(data.get_docs())
                if not all(d.factors for d in docs):
                    raise Exception('SaaS did not return factors')
            except Exception:
                log.exception('Error while request %s', query)
                retries -= 1
            else:
                return docs
        return []

    def _get_saas_url(self, query):
        text = query.text
        if query.url:
            text += f' url:"{query.url}"'
        query = {
            'format': 'json',
            'robot': 1,
            'dbgrlv': 'da',
            'fsgta': '_JsonFactors',
            'relev': 'all_factors',
            'kps': self.options['kps'],
            'text': text,
        }
        url = settings.ISEARCH['api']['saas'][self.options['service']]['search'].url(query=query)
        if self.options['params']:
            url = '&'.join((url, self.options['params']))
        return url

    def _get_doc_factor_list(self, doc):
        factors = doc.factors
        factor_list = []
        for f in self.factor_names:
            factor_list.append(factors.get(f) or 0)
        return factor_list

    def _get_yt_table(self, name):
        full_name = os.path.join(self.options['output'], name)
        if not yt.exists(full_name):
            yt.create('table', full_name, recursive=True)
        return TablePath(full_name, append=True)

    def save_features(self, data, batch_size=1000):
        table = self._get_yt_table('features')
        batch = []
        for row in data:
            batch.append({'key': str(row[0]), 'value': '\t'.join(str(v) for v in row[1:])})
            if len(batch) > batch_size:
                yt.write_table(table, batch)
                batch = []
        if batch:
            yt.write_table(table, batch)

    def save_factor_names(self):
        table = self._get_yt_table('factor_names')
        factor_names = ({'key': str(i), 'value': v} for i, v in enumerate(self.factor_names))
        yt.write_table(table, factor_names)
