import logging
import os
import multiprocessing
import math
import shutil
import random
import six

from sandbox import sdk2

import sandbox.projects.fuzzing.resources as fuzz_res
from sandbox.projects.common import time_utils as tu
from sandbox.projects.common import error_handlers as eh
from sandbox.projects.websearch.middlesearch import resources as ms_res
from sandbox.projects.websearch.upper import resources as upper_resources
from sandbox.projects.app_host import resources as ap_res

SOURCE_TO_RESOURCE = {
    "grpc-client-plan": {
        "NOAPACHE_ADJUSTER": upper_resources.BinaryRequestsForNoapacheAdjuster,
        "WEB_SRC_SETUP": upper_resources.BinaryRequestsForWebSrcSetup,
        "WEB_MISSPELL_SRC_SETUP": upper_resources.BinaryRequestsForWebMisspellSrcSetup,
        "BLENDER": upper_resources.BlenderGrpcClientPlan,
        "YANSWER": upper_resources.YanswerGrpcClientPlan,
        "ANTIOBJECT": upper_resources.AntiObjectGrpcClientPlan
    },
    "fuzzer": {
        "BLENDER": fuzz_res.WebBlenderQueriesFuzzy,
    },
    "dolbilo": {
        "WEB": ms_res.MiddlesearchPlan,
    },
    "app-host-json": {
        "BIATHLON": ap_res.APP_HOST_JSON_CONTEXTS,
        "EDUCATION_BACKEND": ap_res.APP_HOST_JSON_CONTEXTS,
        "ENTITYSEARCH": upper_resources.EntitySearchJsonContexts,
        "INIT_AND_HEADERS": upper_resources.InitAndHeadersJsonContexts,
        "REPORT_INIT_AND_HEADERS": upper_resources.ReportInitAndHeadersJsonContexts,
        "HANDLER_OUTPUT": upper_resources.HandlerOutputJsonContexts,
        "HANDLER_OUTPUT_SLOW": upper_resources.HandlerOutputSlowJsonContexts,
    },
}


class RequestSampler(sdk2.Task):
    """
       Task for sampling request from YT using search/tools/request_sampler
    """
    class Requirements(sdk2.Task.Requirements):
        ram = 10 * 1024  # 10 Gb
        disk_space = 30 * 1024  # 10 Gb

    class Parameters(sdk2.Task.Parameters):
        kill_timeout = 10 * 60 * 60
        request_sampler = sdk2.parameters.Resource(
            "request_sampler executable",
            resource_type=upper_resources.REQUEST_SAMPLER_EXECUTABLE,
            required=True,
        )
        input_table = sdk2.parameters.String(
            "Input yt table (-i flag), empty means 1d for the day before yesterday",
            required=False,
        )
        output_tables_prefix = sdk2.parameters.String(
            "Prefix for output yt tables",
            required=True,
            default="//tmp/request_sampler_output_table_{:07}_{}".format(random.randrange(1e7), tu.date_ymdhm(sep="_")),
        )
        number_of_requests = sdk2.parameters.Integer(
            "Number of requests to get (-d flag)",
            required=True,
            default=10000,
        )
        check_enough_requests = sdk2.parameters.Bool("Check that enough requests sampled", default=True)
        attribute_name = sdk2.parameters.String("Attribute name")
        parallel_download = sdk2.parameters.Bool("Download output tables in parallel", default=True)
        with sdk2.parameters.CheckGroup("Types of requests to get") as sources:
            sources.values.NOAPACHE_ADJUSTER = sources.Value("Adjuster", checked=True)
            sources.values.WEB_SRC_SETUP = sources.Value("Web src setup", checked=False)
            sources.values.WEB_MISSPELL_SRC_SETUP = sources.Value("Web misspell src setup", checked=False)
            sources.values.BLENDER = sources.Value("Blender", checked=False)
            sources.values.WEB = sources.Value("Middlesearch", checked=False)
            sources.values.BIATHLON = sources.Value("Biathlon", checked=False)
            sources.values.EDUCATION_BACKEND = sources.Value("Education backend", checked=False)
            sources.values.INIT_AND_HEADERS = sources.Value("INIT_AND_HEADERS", checked=False)
            sources.values.REPORT_INIT_AND_HEADERS = sources.Value("REPORT_INIT_AND_HEADERS", checked=False)
            sources.values.HANDLER_OUTPUT = sources.Value("HANDLER_OUTPUT", checked=False)
            sources.values.HANDLER_OUTPUT_SLOW = sources.Value("HANDLER_OUTPUT_SLOW", checked=False)
            sources.values.YANSWER = sources.Value("YANSWER", checked=False)
            sources.values.ANTIOBJECT = sources.Value("ANTIOBJECT", checked=False)
            sources.values.ENTITYSEARCH = sources.Value("ENTITYSEARCH", checked=False)
        with sdk2.parameters.RadioGroup('Graphs', required=False) as graphs:
            graphs.values.WEB = graphs.Value("Web", default=True)
            graphs.values.IMAGES = graphs.Value("Images")
            graphs.values.VIDEO = graphs.Value("Video")
            graphs.values.TUTOR = graphs.Value("Tutor")
            graphs.values.REPORT = graphs.Value("Report")
            graphs.values.HANDLER = graphs.Value("Handler")
            graphs.values.NONE = graphs.Value("Not specified")
        with sdk2.parameters.RadioGroup('Request type', required=True) as request_type:
            request_type.values['success'] = request_type.Value('success', default=True)
            request_type.values['error'] = 'error'
            request_type.values['both'] = 'both'
        with sdk2.parameters.RadioGroup('Output type', required=True) as output_type:
            output_type.values['grpc-client-plan'] = output_type.Value('grpc-client-plan', default=True)
            output_type.values['dolbilo'] = 'dolbilo'
            output_type.values['fuzzer'] = 'fuzzer'
            output_type.values['app-host-json'] = 'app-host-json'
        only_first_host = sdk2.parameters.Bool("Queries from only first host", default=False)  # SEARCH-4055
        yt_token_vault_owner = sdk2.parameters.String("Vault owner for YT token to use")
        yt_token_vault_key = sdk2.parameters.String("Vault key for YT token to use", default="yt_token_for_eventlogs")

    def on_execute(self):
        request_sampler_executable = str(sdk2.ResourceData(self.Parameters.request_sampler).path)
        commands = []
        out_resources = []
        graph_filenames = {}
        if self.Parameters.graphs == "WEB":
            graphs = {'web': 100}
        elif self.Parameters.graphs == "IMAGES":
            graphs = {
                "images-like": 0.25,
                "images-serp-context": 99.5,
                "images-view": 0.25,
                # "images-alice": 1,
                # "images-api-cbir-market": 1,
                # "images-cbir": 1,
                # "images-dups": 1,
                # "images-xml-cbir": 1,
                # "images-xml-serp": 2,
            }
        elif self.Parameters.graphs == "VIDEO":
            graphs = {
                "video-search": 100,
                # "video-upper-related": 33,
                # "video-upper-no-fetcher": 33,
                # "video-xml-no-fetcher": 33,
                # "video-upper_": 6,
                # "video-index-new-no-fetcher": 4,
                # "video-index-xml": 1,
                # "video-index-upper": 9,
                # "video-index-upper-new": 9,
                # "video-index-upper-log": 9,
                # "video-index-upper-no-fetcher": 9,
                # "video-upper-vdv": 9,
            }
        elif self.Parameters.graphs == "TUTOR":
            graphs = {'tutor': 100}
        elif self.Parameters.graphs == "HANDLER":
            graphs = {'handler': 100}
        elif self.Parameters.graphs == "REPORT":
            graphs = {'report': 100}
        else:
            graphs = {None: 100}

        if self.Parameters.yt_token_vault_owner:
            token = sdk2.Vault.data(self.Parameters.yt_token_vault_owner, self.Parameters.yt_token_vault_key)
        else:
            token = sdk2.Vault.data(self.Parameters.yt_token_vault_key)
        cmd = [
            request_sampler_executable,
            "-o", self.Parameters.output_tables_prefix,
            "-t", token,
            "--request-type", self.Parameters.request_type,
            "--grep-count", str(self.Parameters.number_of_requests),
        ]
        if self.Parameters.input_table:
            cmd.extend(["-i", self.Parameters.input_table])
        else:
            cmd.append("--fresh")
        source_param = ".".join(self.Parameters.sources)
        if None not in graphs:
            source_param = ":".join((".".join(graphs), source_param))
        cmd.extend(['-s', source_param])
        run_request_sampler(cmd)
        for source in self.Parameters.sources:
            out_res = SOURCE_TO_RESOURCE[self.Parameters.output_type][source](
                self, "{} production requests in {} for {} graphs ({})".format(
                    source,
                    self.Parameters.output_type,
                    self.Parameters.graphs,
                    tu.date_ymdhm(sep="_"),
                ),
                "requests_{}".format(source)
            )
            out_res.ttl = 60
            out_resources.append(out_res)
            last_graph_requests_number = self.Parameters.number_of_requests
            for i, (graph, percentage) in enumerate(six.iteritems(graphs)):
                output_suffix = ""
                if graph:
                    output_suffix += "_{}".format(graph)
                output_suffix += "_{}".format(source)
                result_filename = "requests" + output_suffix
                graph_filenames.setdefault(out_res, []).append(result_filename)
                number_of_requests_for_graph = max(1, int(
                    math.ceil(self.Parameters.number_of_requests * percentage / 100.0)
                )) if i != len(graphs) - 1 else last_graph_requests_number
                last_graph_requests_number -= number_of_requests_for_graph
                cmd = [
                    request_sampler_executable,
                    "-i", self.Parameters.output_tables_prefix + output_suffix,
                    "--download",
                    "-f", result_filename,
                    "-d", str(number_of_requests_for_graph),
                    "-t", token,
                    "--output-format", self.Parameters.output_type,
                ]
                if self.Parameters.only_first_host:
                    cmd.append("--download-first-host")
                commands.append(cmd)
        if self.Parameters.parallel_download:
            p = multiprocessing.Pool()
            map_results = p.map(run_request_sampler, commands)
            p.close()
            p.join()
            for map_result in map_results:
                logging.debug(map_result)
        else:
            for cmd in commands:
                run_request_sampler(cmd)

        if self.Parameters.check_enough_requests:
            enough_requests = True
            for out_res in out_resources:
                res_path = str(sdk2.ResourceData(out_res).path)
                if self.Parameters.graphs:
                    join_files(res_path, graph_filenames[out_res])

                if os.path.isdir(res_path):
                    lines_count = len(os.listdir(res_path))
                else:
                    with open(res_path) as f:
                        lines_count = sum(1 for line in f)
                expected_amount = .9 * self.Parameters.number_of_requests
                logging.debug("Expected at least %s request in %s, got %s", expected_amount, res_path, lines_count)
                if lines_count < expected_amount:
                    enough_requests = False
                    logging.debug("It's not enough!")
            eh.ensure(enough_requests, "Not enough requests sampled")
        elif self.Parameters.graphs:
            # join files
            for out_res in out_resources:
                res_path = str(sdk2.ResourceData(out_res).path)
                join_files(res_path, graph_filenames[out_res])

        if self.Parameters.attribute_name:
            for out_res in out_resources:
                logging.debug("Setting attr %s to %s at resource", self.Parameters.attribute_name, self.id)
                setattr(out_res, self.Parameters.attribute_name, self.id)


def join_files(output_path, input_filenames):
    if len(input_filenames) != 1:
        name_of_biggest = max([(os.path.getsize(input_filename), input_filename) for input_filename in input_filenames])[1]
        with open(name_of_biggest, 'a') as output_file:
            for input_filename in input_filenames:
                if input_filename == name_of_biggest:
                    continue
                with open(input_filename) as input_file:
                    logging.debug('Append content of %s to %s', input_filename, name_of_biggest)
                    output_file.write(input_file.read())
                os.remove(input_filename)
                logging.debug('Remove %s', input_filename)
    else:
        name_of_biggest = input_filenames[0]
    shutil.move(name_of_biggest, output_path)
    logging.debug('Move %s to %s', name_of_biggest, output_path)


def run_request_sampler(cmd):
    logging.debug("pid %s: Running %s", os.getpid(), cmd)

    import subprocess
    s = subprocess.Popen(" ".join(cmd), stderr=subprocess.PIPE, shell=True)
    while True:
        line = s.stderr.readline()
        exitcode = s.poll()
        if (not line) and (exitcode is not None):
            break
        logging.debug("pid %s: %s", os.getpid(), line.strip())

    logging.debug("pid %s: finished", os.getpid(), cmd)
