# -*- coding: utf-8 -*-

import logging
import os.path

from sandbox.sandboxsdk import parameters
from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk.task import SandboxTask

from sandbox.projects import resource_types


class Params(object):
    class QueriesLimit(parameters.SandboxIntegerParameter):
        name = "queries_limit"
        description = "Queries limit for result resource"
        default_value = 5000

    class EnableGeowizardQueries(parameters.SandboxBoolParameter):
        name = "enable_geowizard_queries"
        group = "Geowizard queries"
        description = "Use geowizard queries"
        default_value = True

    class GeoWizardDomains(parameters.SandboxBoolGroupParameter):
        name = "geo_wizard_domains"
        group = "Geowizard queries"
        description = "Use queries for domains"
        choices = [
            ("ru", "ru"),
            ("com.tr", "com.tr"),
            ("by", "by"),
            ("ua", "ua"),
        ]
        default_value = "ru"

    class EnableMetasearchQueries(parameters.SandboxBoolParameter):
        name = "enable_metasearch_queries"
        group = "Metasearch queries"
        description = "Use queries for testing metasearch (KUBR)"
        default_value = True

    class EnableTabloQueries(parameters.SandboxBoolParameter):
        name = "enable_tablo_queries"
        group = "Tablo queries"
        description = "Use queries for testing tablo"
        default_value = True

    class TabloRegionTypes(parameters.SandboxBoolGroupParameter):
        name = "tablo_types"
        group = "Tablo queries"
        description = "Use queries for test tablo reqions"
        choices = [
            (v, v)
            for v in (
                "Вертикаль", "Врезка", "Шапка", "Саджест", "Колдунщик", "Спецсниппет",
                "Выдача", "Настройки", "Общее", "Реклама", "Футер", "Морда",
            )
        ]
        default_value = "Вертикаль Врезка Колдунщик Спецсниппет Выдача Реклама"

    class TabloDomains(parameters.SandboxBoolGroupParameter):
        name = "tablo_domains"
        group = "Tablo queries"
        description = "Use queries for domains"
        choices = [
            ("ru", "ru"),
            ("com", "com"),
            ("com.tr", "com.tr"),
            ("by", "by"),
            ("ua", "ua"),
            ("kz", "kz"),
        ]
        default_value = "ru"

    class AdditionalUsersQueries(parameters.LastReleasedResource):
        name = 'append_users_queries_resource_id'
        group = "Mix another users queries"
        description = 'Add users querues from another resource'
        resource_type = 'USERS_QUERIES'
        default_value = 0

    params = (
        QueriesLimit,
        EnableGeowizardQueries,
        GeoWizardDomains,
        EnableMetasearchQueries,
        EnableTabloQueries,
        TabloRegionTypes,
        TabloDomains,
        AdditionalUsersQueries,
    )


class GenerateUsersQueries(SandboxTask):
    """
        Генерация пользовательских запросов, - источники данных с запросами:
            Геоколдунщики: https://wiki.yandex-team.ru/testirovanie/functesting/websearch/webreport/geowizards/geo -> geowizard.data
            Метапоиск: https://wiki.yandex-team.ru/testirovanie/functesting/websearch/webreport/meta/yandsearch-ru -> yandex_ru.data
            Тестирование выдачи: https://wiki.yandex-team.ru/testirovanie/functesting/testwebsearch/testcases/tablo -> tablo.data

        Поскольку запросов получается мало, добавлена возможность добить список запросов данными из другого ресурса.
        (Настройки по умолчанию предназначены для тестов noapacheupper на домене yandex.ru)
    """

    type = "GENERATE_USERS_QUERIES"

    input_parameters = Params.params

    users_queries_res_id = "users_queries_res_id"
    users_queries_file = "users_queries.txt"

    def on_enqueue(self):
        SandboxTask.on_enqueue(self)
        channel.task = self
        self.ctx[self.users_queries_res_id] = self.create_resource(self.descr,
                                                                      self.users_queries_file,
                                                                      resource_types.USERS_QUERIES,
                                                                      arch="any").id

    def on_execute(self):
        result_file = channel.sandbox.get_resource(self.ctx[self.users_queries_res_id]).path
        impl = _ProcessQueriesImpl(result_file, data_dir=os.path.dirname(__file__), ctx=self.ctx)
        impl.execute()


__Task__ = GenerateUsersQueries


class _ProcessQueriesImpl:
    # используем отдельную имплементацию, чтобы не засорять наследника от SandboxTask
    def __init__(self, result_file, data_dir, ctx):
        self.result_file = result_file
        self.data_dir = data_dir
        self.ctx = ctx

    def execute(self):
        add_q = self.ctx[Params.AdditionalUsersQueries.name]
        additional_queries = channel.task.sync_resource(add_q) if add_q else None
        self._queries_limit = self.ctx[Params.QueriesLimit.name]
        self._count_queries = 0
        self._reqs = set()
        with open(self.result_file, 'w') as f:
            self._f = f
            if self.ctx[Params.EnableGeowizardQueries.name]:
                with open(os.path.join(os.path.join(self.data_dir, "geowizard.data"))) as fr:
                    geo_wizard_domains = self.ctx[Params.GeoWizardDomains.name].split(' ')
                    logging.info("use geowizard domains: " + str(geo_wizard_domains))
                    for line in fr:
                        tk = line.replace("\xc2\xa0", " ").rstrip('\n').split('\t')
                        if tk[2] in geo_wizard_domains:
                            req = (tk[0].strip(' '), int(tk[1]))
                            if not self.add_req(req):
                                return

            if self.ctx[Params.EnableMetasearchQueries.name]:
                with open(os.path.join(os.path.join(self.data_dir, "yandex_ru.data"))) as fr:
                    for line in fr:
                        tk = line.replace("\xc2\xa0", " ").rstrip('\n').split('\t')
                        lr = int(tk[1]) if len(tk) >= 2 else "213"
                        req = (tk[0].strip(' '), lr)
                        if not self.add_req(req):
                            return

            if self.ctx[Params.EnableTabloQueries.name]:
                with open(os.path.join(os.path.join(self.data_dir, "tablo.data"))) as fr:
                    tablo_reg_types = self.ctx[Params.TabloRegionTypes.name].split(' ')
                    tablo_domains = self.ctx[Params.TabloDomains.name].split(' ')
                    logging.info("use tablo types: " + str(tablo_reg_types))
                    logging.info("use table domains: " + str(tablo_domains))
                    line_continue = False
                    tk1 = None
                    for line in fr:
                        line = line.replace("\xc2\xa0", " ").rstrip('\n')
                        logging.info('line: ' + line)
                        if line_continue:
                            if (len(line) - len(line.rstrip('"'))) % 2 == 1:
                                line_continue = False
                                tk1 += '\n' + line[:-1]
                            else:
                                tk1 += '\n' + line
                        else:
                            tk = line.split('\t')
                            if (len(tk[1]) - len(tk[1].lstrip('"'))) % 2 == 1:
                                tk0 = tk[0]
                                tk1 = tk[1][1:]
                                line_continue = True
                                continue
                            else:
                                tk0, tk1 = tk
                        if tk0 in tablo_reg_types:
                            for dmreq in tk1.split('\n'):
                                if ":" in dmreq:
                                    domain, reqtxt = dmreq.split(":", 1)
                                    domain = unicode(domain, "UTF-8")
                                    if domain in tablo_domains:
                                        req = (reqtxt.replace('""', '"').strip(' '), 213)
                                        if not self.add_req(req):
                                            return

            if additional_queries:
                with open(additional_queries) as fr:
                    for line in fr:
                        tk = line.rstrip('\n').split('\t')
                        req = (tk[0], int(tk[1] if len(tk) >= 2 else 213))
                        if not self.add_req(req):
                            return

    def add_req(self, req):
        if req in self._reqs:
            return True
        self._f.write("{}\t{}\n".format(req[0], req[1]))
        self._reqs.add(req)
        self._count_queries += 1
        if self._count_queries == self._queries_limit:
            return False
        return True
