# -*- coding: utf-8 -*-
import logging
import datetime
from sandbox import sdk2
import sandbox.common.types.client as ctc
from sandbox.projects import resource_types
from sandbox.projects.common import network
import sandbox.projects.websearch.middlesearch.resources as ms_resources
from sandbox.projects.websearch.models_proxy import resources


class GetModelsProxyQueries(sdk2.Task):
    """
    Стреляет в средний входными запросами, сохраняет подзапросы и ответы models_proxy и сервисов под ней.
    """

    class Parameters(sdk2.Task.Parameters):

        large_output_count = sdk2.parameters.Integer(
            "queries count for large output",
            description="large output is used for models_proxy priemka tests; 0 to disable",
            default=5000)

        small_output_count = sdk2.parameters.Integer(
            "queries count for small output",
            description="small output is used for small autotests; 0 to disable",
            default=1000)

        mark_results = sdk2.parameters.Bool(
            "mark results to use in regular processes",
            default=False,
            do_not_copy=True)

        generator_binary = sdk2.parameters.Resource(
            "models_proxy tests generator executable",
            description="if not set, find something ready",
            resource_type=resources.ModelsProxyQueriesGenerator)

        mmeta_queries = sdk2.parameters.Resource(
            "input mmeta queries",
            description="if not set, use latest with testenv autoupdate attribute",
            resource_type=ms_resources.WebMiddlesearchPlainTextQueries)

        models_proxy_binary = sdk2.parameters.Resource(
            "models_proxy executable",
            description="if not set, use last released binary",
            resource_type=resources.ModelsProxyExecutable)

        evlogdump_binary = sdk2.parameters.Resource(
            "evlogdump executable",
            description="if not set, use last released binary",
            resource_type=resource_types.EVLOGDUMP_EXECUTABLE)

        with sdk2.parameters.Output():
            # with sdk2.parameters.Group("Large output for perf tests"):
            large_requests = sdk2.parameters.Resource("(large) models_proxy requests", resource_type=resources.ModelsProxyRequests)
            large_requests_responses = sdk2.parameters.Resource("(large) models_proxy requests and responses", resource_type=resources.ModelsProxyRequestsResponses)
            large_fetchdocdata = sdk2.parameters.Resource("(large) FetchDocData results", resource_type=resources.ModelsProxyFetchDocDataResults)
            large_dyntable = sdk2.parameters.Resource("(large) DynTable requests and responses", resource_type=resources.ModelsProxySubsourceResults)
            large_all_subsources = sdk2.parameters.Resource("(large) subsources requests and responses", resource_type=resources.ModelsProxySubsourceResults)

            # with sdk2.parameters.Group("Small output without bigrt cache"):
            autobuild_normal_requests_responses = sdk2.parameters.Resource("(autobuild) models_proxy requests and responses", resource_type=resources.ModelsProxyRequestsResponses)
            autobuild_normal_fetchdocdata = sdk2.parameters.Resource("(autobuild) FetchDocData results", resource_type=resources.ModelsProxyFetchDocDataResults)
            autobuild_normal_subsources = sdk2.parameters.Resource("(autobuild) subsources requests and responses", resource_type=resources.ModelsProxySubsourceResults)

            # with sdk2.parameters.Group("Small output with bigrt cache"):
            autobuild_bigrt_requests_responses = sdk2.parameters.Resource("(bigrt cache) models_proxy requests and responses", resource_type=resources.ModelsProxyRequestsResponses)
            autobuild_bigrt_fetchdocdata = sdk2.parameters.Resource("(bigrt cache) FetchDocData results", resource_type=resources.ModelsProxyFetchDocDataResults)
            autobuild_bigrt_subsources = sdk2.parameters.Resource("(bigrt cache) subsources requests and responses", resource_type=resources.ModelsProxySubsourceResults)

    class Requirements(sdk2.Requirements):
        cores = 2
        ram = 4096  # 4G
        disk_space = 30 * 1024  # 30G
        client_tags = ctc.Tag.VLA

        class Caches(sdk2.Requirements.Caches):
            pass

    def _gzip_to_path(self, input, output, stderr):
        with open(input, 'r') as fi, output.open('wb') as fo:
            sdk2.helpers.subprocess.check_call(
                ["gzip"],
                stdin=fi,
                stdout=fo,
                stderr=stderr
            )

    def on_execute(self):
        if self.Parameters.generator_binary is not None:
            generator_resource = self.Parameters.generator_binary
        else:
            generator_resource = resources.ModelsProxyQueriesGenerator.find(state="READY").first()
            assert generator_resource is not None, "Cannot find MODELS_PROXY_QUERIES_GENERATOR"
            logging.info("Using resource {} for generator".format(generator_resource.id))
        generator_path = str(sdk2.ResourceData(generator_resource).path)

        if self.Parameters.mmeta_queries is not None:
            input_queries_resource = self.Parameters.mmeta_queries
        else:
            input_queries_resource = ms_resources.WebMiddlesearchPlainTextQueries.find(state="READY", attrs={"TE_web_production_mmeta_reqs": None}).first()
            assert input_queries_resource is not None, "Cannot find WEB_MIDDLESEARCH_PLAIN_TEXT_QUERIES"
            logging.info("Using resource {} for input queries".format(input_queries_resource.id))
        input_queries_path = str(sdk2.ResourceData(input_queries_resource).path)

        if self.Parameters.models_proxy_binary is not None:
            models_proxy_resource = self.Parameters.models_proxy_binary
        else:
            models_proxy_resource = resources.ModelsProxyExecutable.find(state="READY", attrs={"released": "stable"}).first()
            assert models_proxy_resource is not None, "Cannot find MODELS_PROXY_EXECUTABLE"
            logging.info("Using resource {} for models_proxy executable".format(models_proxy_resource.id))
        models_proxy_path = str(sdk2.ResourceData(models_proxy_resource).path)

        if self.Parameters.evlogdump_binary is not None:
            evlogdump_resource = self.Parameters.evlogdump_binary
        else:
            evlogdump_resource = resource_types.EVLOGDUMP_EXECUTABLE.find(state="READY", attrs={"released": "stable"}).first()
            assert models_proxy_resource is not None, "Cannot find EVLOGDUMP_EXECUTABLE"
            logging.info("Using resource {} for evlogdump executable".format(evlogdump_resource.id))
        evlogdump_path = str(sdk2.ResourceData(evlogdump_resource).path)

        output_name_prefix = datetime.date.today().isoformat()  # YYYY-MM-DD
        hostname = '[' + network.get_my_ipv6() + ']'

        if self.Parameters.large_output_count:
            with sdk2.helpers.ProcessLog(self, logger="generator_large") as pl:
                sdk2.helpers.subprocess.check_call(
                    [
                        generator_path,
                        "--models-proxy", models_proxy_path,
                        "--evlogdump", evlogdump_path,
                        "--num-queries", str(self.Parameters.large_output_count),
                        "--queries-file", input_queries_path,
                        "--hostname", hostname,
                    ],
                    stdout=pl.stdout, stderr=pl.stderr
                )

                self.Parameters.large_requests_responses = resources.ModelsProxyRequestsResponses(
                    self,
                    output_name_prefix + ' models_proxy requests and responses (large)',
                    output_name_prefix + '_large_models_proxy_requests_responses.tsv.gz'
                )
                large_requests_responses = sdk2.ResourceData(self.Parameters.large_requests_responses)
                self._gzip_to_path('requests_responses.tsv', large_requests_responses.path, pl.stderr)

                self.Parameters.large_requests = resources.ModelsProxyRequests(
                    self,
                    output_name_prefix + ' models_proxy requests (large)',
                    output_name_prefix + '_large_models_proxy_requests.gz'
                )
                large_requests = sdk2.ResourceData(self.Parameters.large_requests)
                with open('requests_responses.tsv', 'r') as fi, large_requests.path.open('wb') as fo:
                    sdk2.helpers.subprocess.check_call(
                        "cut -f2 | gzip",
                        shell=True,
                        stdin=fi,
                        stdout=fo,
                        stderr=pl.stderr
                    )

                self.Parameters.large_fetchdocdata = resources.ModelsProxyFetchDocDataResults(
                    self,
                    output_name_prefix + ' models_proxy mocked FetchDocData (large)',
                    output_name_prefix + '_large_fetched_doc_data.tsv.gz'
                )
                large_fetchdocdata = sdk2.ResourceData(self.Parameters.large_fetchdocdata)
                self._gzip_to_path('fetched_doc_data.tsv', large_fetchdocdata.path, pl.stderr)

                self.Parameters.large_all_subsources = resources.ModelsProxySubsourceResults(
                    self,
                    output_name_prefix + ' models_proxy subsources requests and responses (large)',
                    output_name_prefix + '_large_models_proxy_subsources.tskv.gz'
                )
                large_all_subsources = sdk2.ResourceData(self.Parameters.large_all_subsources)
                self._gzip_to_path('subsources.tskv', large_all_subsources.path, pl.stderr)

                with open('subsources.tskv', 'r') as fi, open('dyntable.tskv', 'w') as fo:
                    for line in fi:
                        line = line.rstrip('\r\n').split('\t')
                        line = [x for x in line if x.startswith('reqid=') or x.startswith('DYNTABLE_HTTP_PROXY_SOURCE:')]
                        fo.write('\t'.join(line) + '\n')
                self.Parameters.large_dyntable = resources.ModelsProxySubsourceResults(
                    self,
                    output_name_prefix + ' models_proxy mocked DynTable (large)',
                    output_name_prefix + '_large_dyntable_data.tskv.gz'
                )
                large_dyntable = sdk2.ResourceData(self.Parameters.large_dyntable)
                self._gzip_to_path('dyntable.tskv', large_dyntable.path, pl.stderr)

                if self.Parameters.mark_results:
                    self.Parameters.large_requests.ttl = "90"
                    self.Parameters.large_requests.TE_models_proxy_requests_large = output_name_prefix
                    self.Parameters.large_fetchdocdata.ttl = "90"
                    self.Parameters.large_fetchdocdata.TE_models_proxy_fetchdocdata_large = output_name_prefix
                    self.Parameters.large_dyntable.ttl = "90"
                    self.Parameters.large_dyntable.TE_models_proxy_dyntable_large = output_name_prefix

                large_requests_responses.ready()
                large_requests.ready()
                large_fetchdocdata.ready()
                large_all_subsources.ready()
                large_dyntable.ready()

        if self.Parameters.small_output_count:
            with sdk2.helpers.ProcessLog(self, logger="generator_normal") as pl:
                if self.Parameters.large_output_count >= self.Parameters.small_output_count:
                    sdk2.helpers.subprocess.check_call(
                        [
                            generator_path,
                            "--models-proxy", models_proxy_path,
                            "--evlogdump", evlogdump_path,
                            "--num-queries", str(self.Parameters.small_output_count),
                            "--just-process-eventlog", "models_proxy_dir/fetch-eventlog",
                        ],
                        stdout=pl.stdout, stderr=pl.stderr
                    )
                else:
                    sdk2.helpers.subprocess.check_call(
                        [
                            generator_path,
                            "--models-proxy", models_proxy_path,
                            "--evlogdump", evlogdump_path,
                            "--num-queries", str(self.Parameters.small_output_count),
                            "--queries-file", input_queries_path,
                            "--hostname", hostname,
                        ],
                        stdout=pl.stdout, stderr=pl.stderr
                    )

                self.Parameters.autobuild_normal_requests_responses = resources.ModelsProxyRequestsResponses(
                    self,
                    output_name_prefix + ' models_proxy requests and responses (autobuild)',
                    'requests_responses.tsv.gz'
                )
                normal_requests_responses = sdk2.ResourceData(self.Parameters.autobuild_normal_requests_responses)
                self._gzip_to_path('requests_responses.tsv', normal_requests_responses.path, pl.stderr)

                self.Parameters.autobuild_normal_fetchdocdata = resources.ModelsProxyFetchDocDataResults(
                    self,
                    output_name_prefix + ' models_proxy mocked FetchDocData (autobuild)',
                    'fetched_doc_data.tsv.gz'
                )
                normal_fetchdocdata = sdk2.ResourceData(self.Parameters.autobuild_normal_fetchdocdata)
                self._gzip_to_path('fetched_doc_data.tsv', normal_fetchdocdata.path, pl.stderr)

                self.Parameters.autobuild_normal_subsources = resources.ModelsProxySubsourceResults(
                    self,
                    output_name_prefix + ' models_proxy subsources requests and responses (autobuild)',
                    'subsources.tskv.gz'
                )
                normal_subsources = sdk2.ResourceData(self.Parameters.autobuild_normal_subsources)
                self._gzip_to_path('subsources.tskv', normal_subsources.path, pl.stderr)

                if self.Parameters.mark_results:
                    self.Parameters.autobuild_normal_requests_responses.ttl = "inf"
                    self.Parameters.autobuild_normal_requests_responses.TE_models_proxy_requests_autobuild = output_name_prefix
                    self.Parameters.autobuild_normal_fetchdocdata.ttl = "inf"
                    self.Parameters.autobuild_normal_fetchdocdata.TE_models_proxy_fetchdocdata_autobuild = output_name_prefix
                    self.Parameters.autobuild_normal_subsources.ttl = "inf"
                    self.Parameters.autobuild_normal_subsources.TE_models_proxy_subsources_autobuild = output_name_prefix

                normal_requests_responses.ready()
                normal_fetchdocdata.ready()
                normal_subsources.ready()

            with sdk2.helpers.ProcessLog(self, logger="generator_bigrt_cache") as pl:
                sdk2.helpers.subprocess.check_call(
                    [
                        generator_path,
                        "--models-proxy", models_proxy_path,
                        "--evlogdump", evlogdump_path,
                        "--num-queries", str(self.Parameters.small_output_count),
                        "--queries-file", input_queries_path,
                        "--with-bigrt-caches",
                        "--hostname", hostname,
                    ],
                    stdout=pl.stdout, stderr=pl.stderr
                )

                self.Parameters.autobuild_bigrt_requests_responses = resources.ModelsProxyRequestsResponses(
                    self,
                    output_name_prefix + ' models_proxy requests and responses (with bigrt cache)',
                    'bigrtcache_requests_responses.tsv.gz'
                )
                bigrt_requests_responses = sdk2.ResourceData(self.Parameters.autobuild_bigrt_requests_responses)
                self._gzip_to_path('requests_responses.tsv', bigrt_requests_responses.path, pl.stderr)

                self.Parameters.autobuild_bigrt_fetchdocdata = resources.ModelsProxyFetchDocDataResults(
                    self,
                    output_name_prefix + ' models_proxy mocked FetchDocData (with bigrt cache)',
                    'bigrtcache_fetched_doc_data.tsv.gz'
                )
                bigrt_fetchdocdata = sdk2.ResourceData(self.Parameters.autobuild_bigrt_fetchdocdata)
                self._gzip_to_path('fetched_doc_data.tsv', bigrt_fetchdocdata.path, pl.stderr)

                self.Parameters.autobuild_bigrt_subsources = resources.ModelsProxySubsourceResults(
                    self,
                    output_name_prefix + ' models_proxy subsources requests and responses (with bigrt cache)',
                    'bigrtcache_subsources.tskv.gz'
                )
                bigrt_subsources = sdk2.ResourceData(self.Parameters.autobuild_bigrt_subsources)
                self._gzip_to_path('subsources.tskv', bigrt_subsources.path, pl.stderr)

                if self.Parameters.mark_results:
                    self.Parameters.autobuild_bigrt_requests_responses.ttl = "inf"
                    self.Parameters.autobuild_bigrt_requests_responses.TE_models_proxy_requests_bigrtcache = output_name_prefix
                    self.Parameters.autobuild_bigrt_fetchdocdata.ttl = "inf"
                    self.Parameters.autobuild_bigrt_fetchdocdata.TE_models_proxy_fetchdocdata_bigrtcache = output_name_prefix
                    self.Parameters.autobuild_bigrt_subsources.ttl = "inf"
                    self.Parameters.autobuild_bigrt_subsources.TE_models_proxy_subsources_bigrtcache = output_name_prefix

                bigrt_requests_responses.ready()
                bigrt_fetchdocdata.ready()
                bigrt_subsources.ready()
