# -*- coding: utf-8 -*-
import os
import logging

from sandbox.common.errors import TaskFailure
import sandbox.projects.yane.common as yane
from sandbox.projects.common.utils import check_subtasks_fails
from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk.parameters import SandboxStringParameter, SandboxBoolParameter
from sandbox.sandboxsdk.process import run_process
from sandbox.sandboxsdk.svn import Arcadia
from sandbox.sandboxsdk import ssh
from sandbox.projects.yane.BuildAndLearn import YaneBuildAndLearn
from sandbox.sandboxsdk import environments


class YaneBuildAndCommit(yane.YaneTaskBase):
    type = 'YANE_BUILD_AND_COMMIT'

    execution_space = 50 * 1024

    thresholds = [p for p in YaneBuildAndLearn.input_parameters if p.name.startswith('fmeasure_threshold')]

    environment = [environments.PipEnvironment('yandex-yt')]

    class TotalLearnTask(SandboxStringParameter):
        name = 'total_learn_task'
        description = 'YANE_TOTAL_LEARN task id'
        required = True

    class CheckFMeasure(SandboxBoolParameter):
        name = 'check_fmeasure'
        description = 'Check fmeasure from quality subtask'
        default_value = True
        sub_fields = {'true': [p.name for p in YaneBuildAndLearn.input_parameters if p.name.startswith('fmeasure_threshold')]}

    class ArcadiaESTargetFolders(SandboxStringParameter):
        name = 'svn_paths_es'
        description = 'Arcadia target folders for ES NER, space-separated'
        default_value = 'search/wizard/entitysearch/data/ner'
        group = yane.GROUP_OUT

    class ArcadiaBEGTargetFolders(SandboxStringParameter):
        name = 'svn_paths_begemot'
        description = 'Arcadia target folder for NER in Begemot, space-separated'
        default_value = 'arcadia/junk/yourmary/temp_ner/begemot'
        group = yane.GROUP_OUT

    class Server(SandboxStringParameter):
        name = 'mr_server'
        description = 'Server'
        default_value = 'hahn.yt.yandex.net'
        required = True

    class YtUploadPath(SandboxStringParameter):
        name = 'yt_upload_path'
        description = 'YT path to upload NER data'
        group = yane.GROUP_OUT

    input_parameters = [TotalLearnTask, CheckFMeasure] + thresholds + [ArcadiaESTargetFolders, ArcadiaBEGTargetFolders, YtUploadPath] + yane.get_base_params().params + [Server]

    NER_RU_FILES = ['relwords.bin', 'significancenewsmodel.info', 'significancenewsukrmodel.info',
                    'synonyms.bin', 'synonym.data.bin', 'docmodel.info', 'videoquerymodel.info',
                    'filterquerymodel.info', 'filterdocmodel.info', 'querymodel.info', 'phrase.model.rus.bin']
    NER_TR_FILES = ['relwords.bin', 'synonyms.bin', 'synonym.data.bin', 'docmodel.info',
                    'filterquerymodel.info', 'filterdocmodel.info', 'querymodel.info']

    def create_ya_make(self, resource_id, task_id, files, out, is_begemot):
        with open(out, "w") as f:
            print >> f, "OWNER(\n    robot-ontodb\n    g:entitysearch\n    g:yane"
            if is_begemot:
                print >> f, "    g:begemot"
            print >> f, ")\n\nUNION()\n"
            print >> f, "# read from https://sandbox.yandex-team.ru/task/{}/resources".format(task_id)
            print >> f, "FROM_SANDBOX({}".format(resource_id)
            if is_begemot:
                print >> f, "    RENAME wizdata/object.data.bin OUT object.data.bin\n    OUT"
            else:
                print >> f, "    OUT object.data.bin"
            for filename in files:
                print >> f, "    {}".format(filename)
            if not is_begemot:
                print >> f, "    ontodb_version.txt"
                print >> f, "    total_learn_task_id.txt"
            print >> f, ")\n\nEND()"

    def create_recurse_ya_make(self, out, path, dirs, is_begemot):
        with open(out, "w") as f:
            print >> f, "OWNER(\n    robot-ontodb\n    g:entitysearch\n    g:yane"
            if is_begemot:
                print >> f, "    g:begemot"
            print >> f, ")\n\nPACKAGE()\n\nPEERDIR("
            for dr in dirs:
                print >> f, "    {}/{}".format(path, dr)
            print >> f, ")\n\nEND()"

    def check_fmeasure_below_threshold(self):
        if self.ctx['check_fmeasure']:
            for quality_task_id in self.total_learn_task.ctx["child_tasks"]:
                quality_task = channel.sandbox.get_task(quality_task_id)
                logging.info(quality_task_id)
                logging.info(quality_task)
                lang = quality_task.ctx["extraction_languages"]
                formula = quality_task.ctx["formula"]
                fmeasure = quality_task.ctx["fmeasure"]
                self.ctx["{}_{}_fmeasure".format(lang, formula)] = fmeasure
                opt = '_'.join(['fmeasure_threshold', lang, formula])
                if opt in self.ctx and fmeasure < self.ctx[opt]:
                    raise TaskFailure("small f-measure: {} (lang = {}, formula = {})".format(fmeasure, lang, formula))

    def commit_to_arcadia(self):
        commit_paths = [(p.strip(), False) for p in self.ctx["svn_paths_es"].split()]
        commit_paths += [(p.strip(), True) for p in self.ctx["svn_paths_begemot"].split()]
        i = 0
        for svn_path, is_begemot in commit_paths:
            datapath = 'data' + str(i)
            Arcadia.checkout(Arcadia.trunk_url(svn_path), path=datapath)
            subdirs = ['ru']
            for subdir in subdirs:
                if not os.path.exists(datapath + "/" + subdir):
                    os.makedirs(datapath + "/" + subdir)
            self.create_recurse_ya_make(datapath + "/ya.make", svn_path, subdirs, is_begemot)
            self.create_ya_make(self.total_learn_task.ctx["russian_data_model"], self.ctx['total_learn_task'], YaneBuildAndCommit.NER_RU_FILES, datapath + "/ru/ya.make", is_begemot)
            run_process(["svn", "add", datapath, "--force"], log_prefix="svn_add")
            commit_msg = "Update NER data from https://sandbox.yandex-team.ru/task/{}/resources SKIP_CHECK".format(self.ctx['total_learn_task'])
            with ssh.Key(self, "robot-ontodb", "robot-ontodb-ssh-key"):
                Arcadia.commit(datapath, commit_msg, "robot-ontodb")
            i += 1

    def upload_to_yt(self):
        if self.ctx['yt_upload_path']:
            import yt.wrapper as yt
            yt.config['proxy']['url'] = self.ctx['mr_server']
            try:
                yt.config['token'] = self.get_vault_data('YANE', 'robot_yane_yt_token')
            except TaskFailure as exc:
                logging.error("couldn't use token: {}, skip uploading".format(exc))
                return
            logging.info("yt proxy is {}, token {}".format(yt.config['proxy']['url'], yt.config['token']))
            for model in {'russian', 'turkish'}:
                model_name = "{}_data_model".format(model)
                if not self.total_learn_task.ctx.get(model_name):
                    continue
                yt_dir_model = self.ctx['yt_upload_path'] + model_name
                if not yt.exists(yt_dir_model):
                    yt.mkdir(yt_dir_model)
                os.mkdir(model_name)
                run_process(["tar", "-C", model_name, "-zxf", self.sync_resource(self.total_learn_task.ctx[model_name])], log_prefix="extract_models_{}".format(model))
                for filename in os.listdir(model_name):
                    filepath = "{}/{}".format(model_name, filename)
                    logging.info("dir {} filename {} is_file {}".format(model_name, filename, os.path.isfile(filepath)))
                    if os.path.isfile(filepath):
                        logging.info("uploading {} to {}".format(filepath, yt_dir_model))
                        yt.smart_upload_file(
                            destination="{}/{}".format(yt_dir_model, filename),
                            filename=filepath,
                            placement_strategy="replace"
                        )

    @yane.run_once
    def upload_models(self):
        # Throws TaskFailure if below threshold
        self.check_fmeasure_below_threshold()
        self.commit_to_arcadia()
        self.upload_to_yt()

    def __init__(self, task_id=0):
        yane.YaneTaskBase.__init__(self, task_id)
        self.ctx['kill_timeout'] = 4 * 60 * 60

    def do_execute(self):
        total_learn_task = channel.sandbox.get_task(self.ctx['total_learn_task'])
        if not total_learn_task.is_finished():
            self.wait_task_completed(total_learn_task)
        self.total_learn_task = total_learn_task
        self.upload_models()

        if 'childs' in self.ctx:
            check_subtasks_fails()
        else:
            self.ctx['childs'] = True
            # self.wait_all_tasks_stop_executing(self.ctx['child_tasks'])


__Task__ = YaneBuildAndCommit
