# -*- coding: utf-8 -*-
import attr
import json
import logging
import os
import random
import shutil

import sandbox.common.errors as errors
from sandbox import sdk2
from sandbox.sdk2 import svn
from sandbox.common.types.resource import State
from sandbox.projects.common import binary_task
from sandbox.projects.common import time_utils as tu
from sandbox.projects.websearch.upper import resources as upper_resources
from sandbox.projects.hollywood.common import resources
from sandbox.sdk2.helpers import subprocess as sp


logger = logging.getLogger(__name__)

OUTPUT_FORMAT_JSON_WRAPPED_BASE64_LINES = "json-wrapped-base64-lines"
OUTPUT_FORMAT_GRPC_CLIENT_PLAN = "grpc-client-plan"
OUTPUT_FORMAT_APP_HOST_JSON = "app-host-json"
REQUEST_SAMPLER_OUTPUT_FORMAT = {
    OUTPUT_FORMAT_JSON_WRAPPED_BASE64_LINES: OUTPUT_FORMAT_GRPC_CLIENT_PLAN,
    OUTPUT_FORMAT_GRPC_CLIENT_PLAN: OUTPUT_FORMAT_GRPC_CLIENT_PLAN,
    OUTPUT_FORMAT_APP_HOST_JSON: OUTPUT_FORMAT_APP_HOST_JSON,
}
HOLLYWOOD_GRAPH_NAMES = ["run", "apply", "commit", "continue"]


class MakeHollywoodAmmo(binary_task.LastBinaryTaskRelease, sdk2.Task):
    """
    Task for making Hollywood ammo (grepping apphost service requests from YT) using search/tools/request_sampler.
    """
    class Requirements(sdk2.Task.Requirements):
        ram = 10 * 1024  # 10 Gb
        disk_space = 30 * 1024  # 10 Gb

    class Parameters(sdk2.Task.Parameters):
        kill_timeout = 10 * 60 * 60
        request_sampler = sdk2.parameters.Resource(
            "request_sampler executable. Leave it empty to use the latest released:stable.",
            resource_type=upper_resources.REQUEST_SAMPLER_EXECUTABLE,
            required=False,
        )
        input_table = sdk2.parameters.String(
            "Input yt table.",
            description="-i flag of request_sampler. For example (a Hollywood log on hahn):\n"
                        "//logs/apphost-alice-prod-event-log/1h/2020-06-26T15:00:00",
            required=True,
        )
        number_of_requests = sdk2.parameters.Integer(
            "Number of requests (of each kind) to get.",
            description="Both -d and -g flags of request_sampler. Value 0 - all.",
            required=False,
            default=10000,
        )
        graphs_sources = sdk2.parameters.String(
            "Graphs-sources pairs. Leave EMPTY for all Hollywood's graphs/sources.",
            description="-s param of request_sampler. Source-graph pairs in format [dot-separated-graphs:]dot-separated-"
                        "corresponding-sources[,same-structure]. For example: graph1:NODE1.NODE2,graph2.graph3:NODE3",
            required=False
        )
        frame_base_64_encoded = sdk2.parameters.Bool(
            "Is frame encoded in base64 in inpu table",
            description="--frame-base-64-encoded param of request sampler",
            default=False,
        )
        frame_column_name = sdk2.parameters.String(
            "Name of frame column in input table",
            description="--frame-column-name param of request_sample",
            required=False,
        )
        remote_temp_tables_directory = sdk2.parameters.String(
            "Directory for temp tables while running request sampler",
            required=False
        )
        remote_temp_files_directory = sdk2.parameters.String(
            "Directory for temp files while running request sampler",
            required=False
        )
        with sdk2.parameters.RadioGroup("Sources' types (ignored if `graphs_sources` param is NOT EMPTY).") as sources_types:
            sources_types.values.regular = sources_types.Value("regular", default=True)
            sources_types.values.http_proxy = sources_types.Value("http_proxy")
        yt_token_vault_key = sdk2.parameters.String(
            "Vault secret with YT token.",
            description="This must be a Sandbox Valt secret.",
            default="robot-voiceint_yt_token"
        )
        with sdk2.parameters.RadioGroup("Process lines.", per_line=3,
                                        description="Process lines of output files.\n"
                                                    "merge - All files will be merged into single file.\n"
                                                    "split - All files will be split line by line into multiple files.") as process_lines:
            process_lines.values.merge = process_lines.Value("merge", default=True)
            process_lines.values.split = process_lines.Value("split")
            process_lines.values.none = process_lines.Value("none")
        shuffle_results = sdk2.parameters.Bool(
            "Shuffle lines.",
            description="Shuffles lines in every output file if selected.",
            required=False,
            default=True,
        )
        with sdk2.parameters.RadioGroup("Output format.", required=True) as output_format:
            output_format.values[OUTPUT_FORMAT_JSON_WRAPPED_BASE64_LINES] = output_format.Value(OUTPUT_FORMAT_JSON_WRAPPED_BASE64_LINES, default=True)
            output_format.values[OUTPUT_FORMAT_GRPC_CLIENT_PLAN] = OUTPUT_FORMAT_GRPC_CLIENT_PLAN
            output_format.values[OUTPUT_FORMAT_APP_HOST_JSON] = OUTPUT_FORMAT_APP_HOST_JSON
        with sdk2.parameters.RadioGroup("Converter input format.",
                                        description="It is a converter inside the request_sampler tool. It has effect "
                                        "only if `output_format`=app-host-json. The converter input format should match "
                                        "the type of nodes, specified in `graphs_sources` param. If `graphs_sources` param is EMPTY, "
                                        "then converter input format is selected automatically.") as converter_input_format:
            converter_input_format.values.service_request = converter_input_format.Value("service_request", default=True)
            converter_input_format.values.http_request = converter_input_format.Value("http_request")
        ext_params = binary_task.LastBinaryReleaseParameters(stable=True)

    def on_save(self):
        super(MakeHollywoodAmmo, self).on_save()

    def on_execute(self):
        super(MakeHollywoodAmmo, self).on_execute()

        request_sampler = self.Parameters.request_sampler
        if request_sampler is None:
            request_sampler = upper_resources.REQUEST_SAMPLER_EXECUTABLE.find(
                state=State.READY,
                attrs=dict(
                    released='stable',
                ),
            ).first()
        request_sampler_executable = str(sdk2.ResourceData(request_sampler).path)

        output_files_prefix = "make_hollywood_ammo_{}_{:07}".format(tu.date_ymdhm(sep="_"), random.randrange(1e7))

        graphs_sources = self.Parameters.graphs_sources
        if graphs_sources == '':
            logger.info("Checking out apphost/conf dir...")
            clone_path = './apphost_conf'
            svn.Arcadia.checkout('arcadia:/arc/trunk/arcadia/apphost/conf', clone_path)

            logger.info("Scanning ALICE graphs for sources...")
            all_scenario_nodes_dict = _scan_for_all_scenario_nodes(clone_path)

            if self.Parameters.sources_types == 'regular':
                logger.info("Filtering all but regular nodes...")
                filtered_scenario_nodes_dict = _filter_but_regular_nodes(clone_path, all_scenario_nodes_dict)
                converter_input_format = "service_request"

            elif self.Parameters.sources_types == "http_proxy":
                logger.info("Filtering all but http_proxy nodes...")
                filtered_scenario_nodes_dict = _filter_but_http_proxy_nodes(clone_path, all_scenario_nodes_dict)
                converter_input_format = "http_request"

            else:
                raise errors.TaskError("Unknown sources_types value, {}".format(self.Parameters.sources_types))

            tmp = []
            for graph_name, node_infos in filtered_scenario_nodes_dict.items():
                node_names = [node_info.name for node_info in node_infos]
                if node_names:
                    tmp.append(':'.join([graph_name, '.'.join(sorted(node_names))]))
            graphs_sources = ','.join(tmp)
            logger.info('Generated graphs_sources={}'.format(graphs_sources))

        else:
            converter_input_format = self.Parameters.converter_input_format

        cmd = [
            request_sampler_executable,
            "-i", self.Parameters.input_table,
            "-s", graphs_sources,
            "-f", output_files_prefix,
            "-t", sdk2.Vault.data(self.Parameters.yt_token_vault_key),
            "--grep-count", str(self.Parameters.number_of_requests),
            "--download-count", str(self.Parameters.number_of_requests),
            "--output-format", REQUEST_SAMPLER_OUTPUT_FORMAT[self.Parameters.output_format],
            "--converter-input-format", converter_input_format,
        ]
        if self.Parameters.frame_base_64_encoded:
            cmd.append("--frame-base-64-encoded")
        if self.Parameters.frame_column_name:
            cmd.append("--frame-column-name")
            cmd.append(self.Parameters.frame_column_name)
        if self.Parameters.remote_temp_tables_directory:
            cmd.append("--remote-temp-tables-directory")
            cmd.append(self.Parameters.remote_temp_tables_directory)
        if self.Parameters.remote_temp_files_directory:
            cmd.append("--remote-temp-files-directory")
            cmd.append(self.Parameters.remote_temp_files_directory)
        self._run_request_sampler(cmd)

        from alice.hollywood.library.python.ammolib.parser import parse_graphs_sources_list, make_output_filenames
        graphs_sources_dict = parse_graphs_sources_list(graphs_sources)
        filenames = make_output_filenames(output_files_prefix, graphs_sources_dict)
        filenames = [filename for filename in filenames if os.path.exists(filename)]
        if len(filenames) == 0:
            raise errors.TaskError("request_sampler returned 0 non-empty files.")

        if self.Parameters.process_lines == "merge":
            logger.info("Merging the resulting files into single file...")
            merge_result_filename = "ammo.out"

            from alice.hollywood.library.python.ammolib.merger import merge_text_files_into_one
            merge_text_files_into_one(filenames, merge_result_filename)
            filenames = [merge_result_filename]
        elif self.Parameters.process_lines == "split":
            logger.info("Splitting the resulting files into multiple files...")
            from alice.hollywood.library.python.ammolib.merger import split_text_file_into_multiple_by_lines
            splitted_filenames = []
            for filename in filenames:
                splitted_filenames.extend(split_text_file_into_multiple_by_lines(filename, ""))
            filenames = splitted_filenames

        if self.Parameters.output_format == OUTPUT_FORMAT_JSON_WRAPPED_BASE64_LINES:
            logger.info("Converting the resulting files...")
            converter_out_path = os.path.join(os.path.split(filenames[0])[0], "converter_out")

            from alice.hollywood.library.python.ammolib.converter import convert
            filenames = convert(filenames, converter_out_path)

        if self.Parameters.shuffle_results:
            logger.info("Shuffling the lines in resulting file(s)...")
            for filename in filenames:
                self._shuffle_lines(filename)

        resource = resources.HollywoodAmmo(self, "Ammo for Hollywood", "hollywood_ammo", ttl=90)
        res_data = sdk2.ResourceData(resource)
        res_data.path.mkdir(0o755, parents=True, exist_ok=True)
        res_data_path = str(res_data.path)
        for filename in filenames:
            shutil.move(filename, res_data_path)

    def _run_request_sampler(self, cmd):
        logging.debug("pid %s: Running %s", os.getpid(), cmd)

        with sdk2.helpers.ProcessLog(self, logger=logging.getLogger("request_sampler")) as pl:
            sp.Popen(" ".join(cmd), shell=True, stdout=pl.stdout, stderr=sp.STDOUT).wait()

        logging.debug("pid %s: finished %s", os.getpid(), cmd)

    def _shuffle_lines(self, filename):
        lines = []
        with open(filename, "r") as in_f:
            lines = in_f.readlines()
        random.shuffle(lines)
        with open(filename, "w") as out_f:
            out_f.writelines(lines)


@attr.s
class NodeInfo(object):
    name = attr.ib()
    backend_name = attr.ib()


def _scan_for_all_scenario_nodes(apphost_conf_path):
    graphs_sources_dict = {}
    alice_path = os.path.join(apphost_conf_path, 'verticals', 'ALICE')
    for filename in os.listdir(alice_path):
        if not filename.endswith('.json'):
            continue
        graph_name = os.path.splitext(os.path.basename(filename))[0]
        logger.info('Scanning graph={} for nodes...'.format(graph_name))
        with open(os.path.join(alice_path, filename), 'r') as in_file:
            graph_json = json.loads(in_file.read())
            graphs_sources_dict[graph_name] = _scan_hollywood_graph_for_nodes(graph_json)
    return graphs_sources_dict


def _scan_hollywood_graph_for_nodes(graph_pyobj, source_filter_fun=None):
    result = []
    nodes = graph_pyobj.get('settings', {}).get('nodes', {})
    for key, val in nodes.items():
        if source_filter_fun and not source_filter_fun(key, val):
            continue
        result.append(NodeInfo(name=key, backend_name=val.get('backend_name', None)))

    return result


def _filter_but_regular_nodes(apphost_conf_path, nodes_dict):
    return _filter_nodes_but(apphost_conf_path, nodes_dict, need_http_proxy=False)


def _filter_but_http_proxy_nodes(apphost_conf_path, nodes_dict):
    return _filter_nodes_but(apphost_conf_path, nodes_dict, need_http_proxy=True)


def _filter_nodes_but(apphost_conf_path, nodes_dict, need_http_proxy):
    result = {}
    for graph_name, node_infos in nodes_dict.items():
        result[graph_name] = []
        for node_info in node_infos:
            if not need_http_proxy and node_info.backend_name != 'HOLLYWOOD_COMMON':
                continue

            # NOTE: In case you need http_proxy nodes ammo, this will collect them from all the ALICE graphs,
            # not just Hollywood graphs
            if need_http_proxy and not _is_backend_http_proxy(apphost_conf_path, node_info.backend_name):
                continue

            result[graph_name].append(node_info)
    return result


def _is_backend_http_proxy(apphost_conf_path, backend_name):
    if not backend_name:
        return False
    backend_filename = os.path.join(apphost_conf_path, 'backends', backend_name + '.json')
    if not os.path.exists(backend_filename):
        return False
    with open(backend_filename) as in_file:
        backend_pyobj = json.loads(in_file.read())
        return backend_pyobj['default_settings'].get('http_proxy', False)
