# -*- coding: utf-8 -*-

import logging
import os
import tarfile
from shutil import copy, copytree, move, rmtree

from sandbox import sdk2
from sandbox.projects.common.search import components as sc
from sandbox.projects.common import dolbilka
from sandbox.projects.ydo.backend.YdoSaasSearchProxy.resource_types import YdoSaasSearchProxy, YdoSearchProxyConfig, YdoRubricEmbeddings, YdoProblemsList, YdoWorkersPuidList
from sandbox.projects.ydo.backend.YdoSaasSearchProxy.resource_types import YdoClinicsList, YdoRubricatorFeatures, YdoSaasSearchProxyModels, YdoSaasSearchProxyWord2Vec
from sandbox.projects import resource_types
from sandbox.projects.geobase.Geodata6BinStable.resource import GEODATA6BIN_STABLE
from sandbox.projects.ydo.rubrics_merger.YdoRubricsMerger import YdoMergedRubricsDump, YdoMergedGeo2WorkersCountDump
from sandbox.projects.ydo import YdoFeaturesJson, YdoCollectionsJson, YdoServicesPriceIntervalsJson, YdoCustomizerJson, YdoWizardCustomizerJson, YdoCallCenterCardSettingsJson
from sandbox.projects.ydo.resource_types import YdoDssmModel
from sandbox.projects.common.search.config import SearchConfig
from sandbox.projects.ParallelDumpEventlog.__init__ import _run_dolbilka_thread
from sandbox.projects.resource_types import EVENTLOG_DUMP
from sandbox.sandboxsdk.svn import Arcadia
from sandbox.projects.ydo import execute_cmd
from sandbox.projects.common import resource_selectors


class YdoMetasearchConfig(SearchConfig):
    '''config'''


class YdoMetasearchWorkdir(sdk2.Resource):
    '''work_dir'''


class YdoMetaseatchRequests(sdk2.Resource):
    '''Ydo requests, contains path'''
    releasable = True


class YdoMetaSearch(sc.SearchExecutableComponent):
    def patch_backend_conf(self, path, port):
        with open(path, "r") as f:
            conf = f.read()
        conf = conf.replace("INPROC_PORT", str(port + 1))

        with open(path, "w") as f:
            f.write(conf)

    # TODO: использовать нормальный парсер конфига
    def patch_searchproxy_conf(self, path):
        indent = "    "
        with open(path, "r") as f:
            conf = f.read()
        lines = conf.split('\n')
        logger_type = None
        eventlog_enabled = None
        eventlog_template = None
        for i, line in enumerate(lines):
            if line.find("LoggerType") != -1:
                if line.find("${LOG_DIR}/current-global-searchproxy") == -1:
                    logger_type = i
            if line.find("EventLogTemplate") != -1:
                eventlog_template = i
            if line.find("EventLogEnabled: true") != -1:
                eventlog_enabled = i
        if logger_type is not None:
            lines[logger_type] = indent + "LoggerType : ${LOG_DIR}/current-global-searchproxy"
        if eventlog_enabled is None and eventlog_template is not None:
            lines.insert(eventlog_template + 1, indent + indent + "EventLogEnabled: true")
        with open(path, "w") as f:
            f.write('\n'.join(lines))

    def __init__(self, work_dir, binary, port, config_class, config_file, config_params, tvm_key):
        self.patch_backend_conf(work_dir + "/backends.conf", port)
        self.patch_searchproxy_conf(work_dir + "/searchproxy.conf")
        super(YdoMetaSearch, self).__init__(
            work_dir=work_dir,
            binary=binary,
            port=port,
            config_class=config_class,
            config_file=config_file,
            config_params=config_params
        )
        self.work_dir = work_dir
        self.environment.update({"YDO_TVM_KEY": tvm_key})
        self.name = "searchproxy"

    def _get_run_cmd(self, config_path):
        return [
            "-V", "LOG_DIR=" + self.work_dir + "/logs",
            "-V", "WORK_DIR=" + self.work_dir,
            "-V", "CONFIG_DIR=" + self.work_dir,
            "-V", "REARRANGE_DATA_DIR=" + self.work_dir + "/data",
            "-V", "BASE_PORT=" + self.port,
            "-V", "LOG_LEVEL=6",
            "-t", self.work_dir + "/searchproxy.conf"
        ]


def create_resource_params():
    class Parameters(sdk2.Task.Parameters):

        group_name = "Ydo shooting params"

        binary_first = sdk2.parameters.Resource(
            'Metasearch binary 1',
            resource_type=YdoSaasSearchProxy,
            required=True,
            group=group_name
        )

        binary_second = sdk2.parameters.Resource(
            'Metasearch binary 2 (baseline)',
            resource_type=YdoSaasSearchProxy,
            required=False,
            group=group_name
        )

        num_runs = sdk2.parameters.Integer(
            "Number of runs",
            default=5,
            required=True,
            group=group_name
        )

        max_rps = sdk2.parameters.Integer(
            "Max rps for single dolbilka thread",
            default=10,
            required=True,
            group=group_name
        )

        max_requests = sdk2.parameters.Integer(
            "Max requests to metasearch",
            default=1000,
            required=True,
            group=group_name
        )

        requests = sdk2.parameters.Resource(
            'Requests',
            resource_type=YdoMetaseatchRequests,
            required=False,
            group=group_name
        )

        tvm_key = sdk2.parameters.YavSecret(
            "TVM_KEY for metasearch proxy",
            default="sec-01d7q0522wbtw8wene48dttsk3",
            group=group_name
        )

        workdir_from_script = sdk2.parameters.Bool(
            "Download resource from script",
            default=True,
            group=group_name
        )

        datascript_path = sdk2.parameters.String(
            "Arcadia path to script",
            default="arcadia:/arc/trunk/arcadia/ydo/tools/searchproxy/download_data.sh",
            group=group_name
        )

        search_proxy_config = sdk2.parameters.Resource(
            'Search proxy config',
            resource_type=YdoSearchProxyConfig,
            required=False,
            group=group_name
        )

        geodata = sdk2.parameters.Resource(
            'Geodata6bin',
            resource_type=GEODATA6BIN_STABLE,
            required=False,
            group=group_name
        )

        rubrics_dump = sdk2.parameters.Resource(
            'Rubrics dump',
            resource_type=YdoMergedRubricsDump,
            required=False,
            group=group_name
        )

        features_json = sdk2.parameters.Resource(
            'Features json',
            resource_type=YdoFeaturesJson,
            required=False,
            group=group_name
        )

        rubrics_embeddings = sdk2.parameters.Resource(
            'Rubrics embeddings',
            resource_type=YdoRubricEmbeddings,
            required=False,
            group=group_name
        )

        merged_geo_2_workers_count_dump = sdk2.parameters.Resource(
            'Merged geo to workers count dump',
            resource_type=YdoMergedGeo2WorkersCountDump,
            required=False,
            group=group_name
        )

        problems = sdk2.parameters.Resource(
            'Problems list',
            resource_type=YdoProblemsList,
            required=False,
            group=group_name
        )

        clinics = sdk2.parameters.Resource(
            'Clinics list',
            resource_type=YdoClinicsList,
            required=False,
            group=group_name
        )

        rubricator_features = sdk2.parameters.Resource(
            'Rubricator features',
            resource_type=YdoRubricatorFeatures,
            required=False,
            group=group_name
        )

        collections = sdk2.parameters.Resource(
            'Collections json',
            resource_type=YdoCollectionsJson,
            required=False,
            group=group_name
        )

        puids = sdk2.parameters.Resource(
            'Workers puid list',
            resource_type=YdoWorkersPuidList,
            required=False,
            group=group_name
        )

        services_price_intervals = sdk2.parameters.Resource(
            'Service price intervals json',
            resource_type=YdoServicesPriceIntervalsJson,
            required=False,
            group=group_name
        )

        saas_search_proxy_models = sdk2.parameters.Resource(
            'Saas search proxy models',
            resource_type=YdoSaasSearchProxyModels,
            required=False,
            group=group_name
        )

        saas_search_proxy_word_2_vec = sdk2.parameters.Resource(
            'Saas search proxy word2vec',
            resource_type=YdoSaasSearchProxyWord2Vec,
            required=False,
            group=group_name
        )

        dssm_model = sdk2.parameters.Resource(
            'DSSM',
            resource_type=YdoDssmModel,
            required=False,
            group=group_name
        )

        rubric_dssm = sdk2.parameters.Resource(
            'Rubric dssm',
            resource_type=YdoDssmModel,
            required=False,
            group=group_name
        )

        customizer = sdk2.parameters.Resource(
            'Customizer json',
            resource_type=YdoCustomizerJson,
            required=False,
            group=group_name
        )

        wizard_customizer = sdk2.parameters.Resource(
            'Wizard customizer json',
            resource_type=YdoWizardCustomizerJson,
            required=False,
            group=group_name
        )

        call_center_settings = sdk2.parameters.Resource(
            'Call center card settings',
            resource_type=YdoCallCenterCardSettingsJson,
            required=False,
            group=group_name
        )

        middlesearch_evlogdump_resource_id = sdk2.parameters.Resource(
            'Evlogdump executable',
            resource_type=resource_types.EVLOGDUMP_EXECUTABLE,
            required=True,
            group=group_name
        )

        params = (
            binary_first,
            binary_second,
            num_runs,
            max_rps,
            max_requests,
            requests,
            workdir_from_script,
            datascript_path,
            # tvm_key,
            search_proxy_config,
            geodata,
            rubrics_dump,
            features_json,
            rubrics_embeddings,
            merged_geo_2_workers_count_dump,
            problems,
            rubricator_features,
            clinics,
            collections,
            puids,
            services_price_intervals,
            saas_search_proxy_models,
            saas_search_proxy_word_2_vec,
            dssm_model,
            rubric_dssm,
            customizer,
            wizard_customizer,
            call_center_settings,
            middlesearch_evlogdump_resource_id
        )
    return Parameters


def get_datascript_path_and_name(path_to_script):
    path = '/'.join(path_to_script.split('/')[:-1])
    name = path_to_script.split('/')[-1]
    return path, name


class YdoParallelDumpEventlog(sdk2.Task):

    Parameters = create_resource_params()

    def _unpack_folder_to_directory(self, tar_path, dst, folder_name=".", tmp_path="./tmp", unpack=True):
        if unpack:
            tar = tarfile.open(tar_path)
            tar.extractall(path=tmp_path)
            tar.close()
        else:
            tmp_path = tar_path

        folder_dir = tmp_path
        if folder_name:
            folder_dir += "/" + folder_name
        for f in os.listdir(tmp_path + "/" + folder_name):
            copy(folder_dir + "/" + f, dst)
        if unpack:
            rmtree(tmp_path)

    # TODO: чекаутить скрипт, собирающий данные из аркадии
    def prepare_workdir(self, index):
        name = "work_dir_" + str(index)
        resource = YdoMetasearchWorkdir(self, "Metasearch data dir", name)
        resource_data = sdk2.ResourceData(resource)
        resource_data.path.mkdir(0o777, parents=True, exist_ok=True)
        (resource_data.path / "data").mkdir(0o777, parents=True, exist_ok=True)
        (resource_data.path / "data" / "models_storage").mkdir(0o777, parents=True, exist_ok=True)
        (resource_data.path / "data" / "word2vec" / "word2vec_1kk").mkdir(0o777, parents=True, exist_ok=True)
        (resource_data.path / "logs").mkdir(0o777, parents=True, exist_ok=True)
        work_dir = str(resource_data.path)
        data_dir = work_dir + "/data"
        self._work_dir.append(work_dir)

        if self.Parameters.workdir_from_script:
            checkout_path, script_name = get_datascript_path_and_name(self.Parameters.datascript_path)
            Arcadia.checkout(checkout_path, work_dir + "/datascript")
            execute_cmd(
                [
                    work_dir + "/datascript/" + script_name,
                    work_dir,
                    str(10000 + 100 * index),
                    "--use_hamster_backends"
                ],
                "datascript",
                "data download failed"
            )
            return

        parameters_to_data = [
            self.Parameters.geodata,
            self.Parameters.rubrics_dump,
            self.Parameters.features_json,
            self.Parameters.rubrics_embeddings,
            self.Parameters.merged_geo_2_workers_count_dump,
            self.Parameters.problems,
            self.Parameters.clinics,
            self.Parameters.rubricator_features,
            self.Parameters.collections,
            self.Parameters.puids,
            self.Parameters.services_price_intervals,
            self.Parameters.dssm_model,
            self.Parameters.customizer,
            self.Parameters.wizard_customizer,
            self.Parameters.call_center_settings,
        ]

        self._unpack_folder_to_directory(str(sdk2.ResourceData(self.Parameters.saas_search_proxy_word_2_vec).path), data_dir + "/word2vec/word2vec_1kk",
                                         folder_name="", tmp_path=work_dir + '/tmp0', unpack=False)
        self._unpack_folder_to_directory(str(sdk2.ResourceData(self.Parameters.search_proxy_config).path), work_dir, folder_name="searchproxy_configs", tmp_path=work_dir + '/tmp1')
        self._unpack_folder_to_directory(str(sdk2.ResourceData(self.Parameters.saas_search_proxy_models).path), data_dir + "/models_storage", folder_name=".", tmp_path=work_dir + '/tmp2')

        for p in parameters_to_data:
            if os.path.isdir(str(sdk2.ResourceData(p).path)):
                copytree(str(sdk2.ResourceData(p).path), data_dir)
            else:
                copy(str(sdk2.ResourceData(p).path), data_dir)

        copy(str(sdk2.ResourceData(p).path), data_dir + "/rubric.dssm")
        move(work_dir + "/hamster_backends.conf", work_dir + "/backends.conf")
        move(work_dir + "/hamster_searchmap.json", work_dir + "/searchmap.json")
        move(work_dir + "/hamster_searchproxy-ydo.conf", work_dir + "/searchproxy-ydo.conf")
        move(data_dir + "/rubric_embeddings_filtered.json", data_dir + "/rubric_embeddings.json")

    def start_searchproxies(self):
        self.searchproxies = []
        test_binary = self.Parameters.binary_first
        baseline_binary = self.Parameters.binary_second
        if not baseline_binary:
            baseline_binary = sdk2.Resource.find(YdoSaasSearchProxy, id=resource_selectors.by_last_released_task(YdoSaasSearchProxy)[0]).first()
        for i, param in enumerate((test_binary, baseline_binary)):
            self.searchproxies.append(
                YdoMetaSearch(
                    self._work_dir[i], str(sdk2.ResourceData(param).path), 10000 + 100 * i,
                    YdoMetasearchConfig, str(sdk2.ResourceData(param).path), "", self.Parameters.tvm_key.data()["TVM_SECRET"]
                )
            )
            logging.info("Binary {} started".format(i))

        for i in xrange(2):
            self.searchproxies[i].start()
            self.searchproxies[i].wait()

    def stop_searchproxies(self):
        for proxy in self.searchproxies:
            proxy.stop()

    def publish_eventlogs(self, num_run):
        for i in xrange(2):
            resource = sdk2.Resource.find(type="EVENTLOG_DUMP", task_id=self.id, attrs={"binary_number" : i, "run_number" : num_run}).first()
            resource_data = sdk2.ResourceData(resource)
            copy(self._work_dir[i] + "/logs/current-searchproxy-eventlog-ydo_wizard", str(resource_data.path))
            resource_data.ready()

    def make_plans(self):
        self._plan_file = [
            os.path.join(self._work_dir[0], 'plan.bin.1st'),
            os.path.join(self._work_dir[1], 'plan.bin.2nd')
        ]
        requests_resource = self.Parameters.requests
        if not requests_resource:
            requests_resource = sdk2.Resource.find(
                YdoMetaseatchRequests
            ).order(-sdk2.Resource.updated).first()
        self._requests = str(sdk2.ResourceData(requests_resource).path)
        for i in xrange(2):
            dolbilka.convert_queries_to_plan(
                self._requests,
                self._plan_file[i],
                rps=self.Parameters.max_rps,
                max_queries=self.Parameters.max_requests
            )

    def do_shoot(self):
        for i in xrange(2):
            dolbilka_thread = _run_dolbilka_thread(
                self._plan_file[i],
                self.searchproxies[i],
                'shot_{}_{}_{}'.format(1, 1, i),
                self.Parameters.max_requests
            )
            dolbilka_thread.join()

    def clear_logs(self):
        for i in xrange(2):
            os.remove(self._work_dir[i] + "/logs/current-searchproxy-eventlog-ydo_wizard")

    def on_enqueue(self):
        for run_number in xrange(self.Parameters.num_runs):
            for binary_number in xrange(2):
                resource = EVENTLOG_DUMP(self, "Ydo metasearch eventlog", "ydo_eventlog_{}_{}".format(binary_number, run_number))
                resource.run_number = run_number
                resource.binary_number = binary_number
                resource.key = "{},{}".format(run_number, 0)

    def on_execute(self):
        self._work_dir = []
        for i in xrange(2):
            self.prepare_workdir(i)
        self.make_plans()
        for i in xrange(self.Parameters.num_runs):
            self.start_searchproxies()
            self.do_shoot()
            self.publish_eventlogs(i)
            self.stop_searchproxies()
            self.clear_logs()
