import typing
from logging import getLogger

from django.core.management.base import BaseCommand
from django.utils.encoding import force_text
from yt.wrapper import TablePath, yt_dataclass
from yt.wrapper import yson

from intranet.search.core.utils.saas import get_factor_names, get_revisions_for_meta, get_factors_for_url
from intranet.search.core.yt_client import client as yt

log = getLogger(__name__)


def get_doc_factor_list(factor_names, factors):
    factor_list = []
    for f in factor_names:
        factor_list.append(factors.get(f) or 0)
    return factor_list


@yt_dataclass
class FactorRow:
    query: typing.Optional[str] = None
    url: typing.Optional[str] = None
    factors: typing.Optional[str] = None


class Command(BaseCommand):
    help = 'Collect factors for search pool'

    def add_arguments(self, parser):
        parser.add_argument('--service', '-s', action='store', default='intrasearch-wiki',
                            help='saas service for scrapping')
        parser.add_argument('--input', '-i', action='store', default='', help='input yt table name')
        parser.add_argument('--output', '-o', action='store', default='', help='output yt path for pool')
        parser.add_argument('--params', '-p', action='store', default='', help='extra saas cgi parameters')
        parser.add_argument('--query', '-q', action='store', default='', help='check just one query, this one')
        parser.add_argument('--url', '-u', action='store', default='', help='check just one query. this is the url')

    def handle(self, **options):
        self.options = options
        self.factor_names = get_factor_names('intrasearch-wiki')
        self.revisions = get_revisions_for_meta()
        if self.options['query']:
            result = get_factors_for_url(self.options['query'], self.options['url'], self.revisions)
            print(result)
        else:
            self.save_factors(self.gen_search_results())

    def gen_search_results(self):
        data = yt.read_table(self.options['input'])
        for i, row in enumerate(data):
            log.debug('Retrieve for: [%s], url=%s', row['query'], row['url'])
            factors = get_factors_for_url(row['query'], row['url'], self.revisions)
            if not factors:
                log.warning('Cannot find enough for query %s', row['query'])
                factors = None
            else:
                factors = force_text(yson.dumps(factors))
            yield FactorRow(query=row['query'], url=row['url'], factors=factors)

    def _get_yt_table(self):
        full_name = self.options['output']
        if not yt.exists(full_name):
            yt.create('table', full_name, recursive=True)
        return TablePath(full_name, append=True)

    def save_factors(self, data, batch_size=1000):
        table = self._get_yt_table()
        batch = []
        for row in data:
            batch.append(row)
            if len(batch) > batch_size:
                yt.write_table_structured(table, FactorRow, batch)
                batch = []
        if batch:
            yt.write_table_structured(table, FactorRow, batch)
