# coding: utf-8
import os, re
import logging
from datetime import datetime
from sandbox.sandboxsdk.parameters import SandboxStringParameter, LastReleasedResource, SandboxSvnUrlParameter
from sandbox.sandboxsdk.process import run_process
from sandbox.sandboxsdk.paths import make_folder

from sandbox.projects import resource_types
import sandbox.projects.common.dynamic_models.download as models_download

from sandbox.sandboxsdk.task import SandboxTask


class MRServerParameter(SandboxStringParameter):
    name = 'mrserver'
    description = 'Mapreduce yt server'
    default_value = 'aristotle.yt.yandex.net'
    group = 'params'
    required = True


class YtUser(SandboxStringParameter):
    name = 'yt_user'
    description = 'Mapreduce yt user'
    default_value = 'dict'
    group = 'params'
    required = True


class MapreduceYTExecutableResourceParameter(LastReleasedResource):
    name = 'mapreduce_yt_resource_id'
    description = 'mapreduce-yt executable resource'
    group = 'params'
    resource_type = resource_types.YT_LOCAL


class SourceTableParameter(SandboxStringParameter):
    name = 'pairs_table'
    description = 'Table with query-ontoid pairs'
    default_value = '//home/dict/belalex/testpairs'
    group = 'params'
    required = True


class DestinationSvnDir(SandboxSvnUrlParameter):
    name = 'destination_arcadia_path'
    description = 'Svn dir when we commit sticky objects'
    default_value = 'arcadia:/arc/trunk/arcadia/junk/belalex'
    group = 'params'
    required = True


class DestinationStickyFilename(SandboxStringParameter):
    name = 'destination_filename'
    description = 'File name for ontodbfixes stickies (file should be under version control)'
    default_value = 'ontodbfixes_stickies.gzt'
    group = 'params'
    required = True


class BuildOntodbFixesSticky(SandboxTask):
    """
        ontodbfixes virtualenv building
    """
    type = 'BUILD_ONTODB_FIXES_STICKY'

    input_parameters = [
        MRServerParameter,
        YtUser,
        MapreduceYTExecutableResourceParameter,
        SourceTableParameter,
        DestinationSvnDir,
        DestinationStickyFilename
    ]

    def prepare_svn_private_key(self):
        private_key = self.get_vault_data('YANE', 'robot-srch-ontodb-rw').strip()
        robot_pk_filename = self.path('robot_private_key')
        with open(robot_pk_filename, 'w+') as f:
            f.write(private_key)
        run_process(
            ['chmod', '600', robot_pk_filename],
            log_prefix='chmod_private_key',
        )
        return robot_pk_filename

    def get_yt_local(self):
        return self.sync_resource(
           self.ctx[MapreduceYTExecutableResourceParameter.name]
        )
        # return "/home/belalex/yt_local.tgz"

    def get_mapreduce_yt_binary(self):
        mapreduce_yt_tar = self.get_yt_local()

        yt_dir_path = self.path('yt')
        make_folder(yt_dir_path)
        run_process(
            ['tar', '-C', yt_dir_path, '-zxvf', mapreduce_yt_tar],
            shell=True
        )
        mapreduce_yt_bin = os.path.join(yt_dir_path, 'archive/bin/mapreduce-yt')
        return mapreduce_yt_bin, self.get_yt_env(yt_dir_path)

    def get_yt_token(self):
        return self.get_vault_data('YANE', 'yt-token').strip()

    def get_yt_env(self, yt_path):
        yt_token = self.get_yt_token()
        env = os.environ.copy()
        env["MR_USER"] = self.ctx[YtUser.name]
        env["MR_RUNTIME"] = 'YT'
        env["MR_CLUSTER"] = 'yt'
        env["YT_TOKEN"] = yt_token
        env["PYTHONPATH"] = os.path.join(yt_path, "archive/python") + ":" + env["PYTHONPATH"]
        logging.info('yt token: %s' % yt_token[:8])
        return env

    def get_svn_env(self):
        svn_private_key_filename = self.prepare_svn_private_key()
        env = os.environ.copy()
        env['SVN_SSH'] = 'ssh -v -l robot-srch-ontodb-rw -i {}'.format(svn_private_key_filename)
        return env

    def read_from_yt_to_file(self, filename, mapreduce_yt_bin, mapreduce_yt_env):
        with open(filename, "w+") as f:
            run_process(
                [mapreduce_yt_bin,
                 '-server', self.ctx[MRServerParameter.name],
                 '-read', self.ctx[SourceTableParameter.name]
                ],
                log_prefix='mr_read',
                environment=mapreduce_yt_env,
                stdout=f,
                shell=True
            )
            f.flush()

    def convert_to_gzt(self, yt_table_file, gzt_file):
        regex = re.compile(r"^[^\"\'\<\>\[\]\#\$\^\~\-]+$")
        with open(yt_table_file) as rf:
            pairs = {}
            for line in rf:
                pair = line.strip()
                pair = pair.split('\t')
                if len(pair) != 2:
                    raise ValueError('We want tab separated key-ontoid pairs.')
                if not regex.match(pair[0]):
                    raise ValueError('Text key can only contain \"\'\<\>\[\]\#\$\^\~\-')
                if ' ' in pair[1]:
                    raise ValueError('Ontoid cannot contain spaces.')
                if pair[0] in pairs:
                    raise ValueError('Duplicate key.')
                pairs[pair[0]] = pair[1]
            with open(gzt_file, "w+") as wf:
                wf.write("""encoding "utf8";\nimport "search/wizard/entitysearch/database/format/index.gztproto";\n""")
                for key in sorted(pairs.iterkeys()):
                    ontoid = pairs[key]
                    wf.write("""TEntitySearchFixedResponse { key="%s"; Tld=RU; BadgeId="%s" }\n""" % (
                        key, ontoid
                    ))

    def commit_stickies(self, stickies_path):
        now = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
        run_process(
            [
                'svn', 'commit', '-m',
                '\"autocommit ontodbfixes stickies %s\"' % now,
                stickies_path
            ],
            log_prefix='svn_commit',
            environment=self.get_svn_env(),
            shell=True
        )

    def get_pairs_file_from_svn_pathes(self, pathes, filename):
        for svnfilename in pathes:
            if svnfilename.endswith(filename):
                return svnfilename
        return None

    def on_execute(self):
        mapreduce_yt_bin, mapreduce_yt_env = self.get_mapreduce_yt_binary()
        pairs_file = self.path("pairs.tsv")
        self.read_from_yt_to_file(pairs_file, mapreduce_yt_bin, mapreduce_yt_env)

        dst_pairs_path = self.get_pairs_file_from_svn_pathes(
            models_download.svn(self.path('rep'), self.ctx[DestinationSvnDir.name]),
            self.ctx[DestinationStickyFilename.name]
        )
        if not dst_pairs_path:
            raise Exception('Cant file "%s" in svn' % self.ctx[DestinationStickyFilename.name])
        self.convert_to_gzt(pairs_file, dst_pairs_path)
        self.commit_stickies(dst_pairs_path)


__Task__ = BuildOntodbFixesSticky
