import os
import shutil
import logging

from sandbox.sandboxsdk.paths import make_folder
from sandbox.sandboxsdk.svn import Arcadia
from sandbox.sandboxsdk.process import run_process
import sandbox.sandboxsdk.parameters as sdk_parameters
from sandbox.projects import resource_types
from sandbox.projects.common.userdata import userdata_base_task, util


class GeoAParameter(sdk_parameters.ResourceSelector):
    name = 'geoa_resource_id'
    description = "Resource with geoa.c2p"
    required = False
    resource_type = resource_types.GEOA_C2P
    group = userdata_base_task.DATA_FILES_GROUP_NAME


class IpregResourceParameter(sdk_parameters.LastReleasedResource):
    name = 'ipreg_resource_id'
    description = 'Resource with IPREG and co'
    required = False  # don't require it yet
    resource_type = resource_types.IPREG_SET
    group = userdata_base_task.DATA_FILES_GROUP_NAME


class UserdataPackage(sdk_parameters.LastReleasedResource):
    name = 'userdata_package_resid'
    description = 'Resource with userdata package'
    resource_type = resource_types.USERFEAT_USERDATA_PACKAGE
    group = userdata_base_task.PACKAGES_GROUP_NAME


class UserdataPrepSingleRun(userdata_base_task.Task):
    """
        Runs userdata_prep on a stored state
    """

    type = 'USERDATA_PREP_SINGLE_RUN'

    execution_space = 60000
    node_chunk_store_quota = 25 << 30
    forbid_chunk_storage_in_tmpfs = True
    yt_testable = True

    input_parameters = util.smart_join_params(
        userdata_base_task.Task.input_parameters,
        UserdataPackage,
        GeoAParameter,
        IpregResourceParameter,
    )

    def on_enqueue(self):
        super(UserdataPrepSingleRun, self).on_enqueue()
        res = self.create_resource(
            "userdata index part from %s" % self.descr,
            'YYY',
            resource_types.USERDATA_INDEX_FRAGMENT,
            arch='any',
        )
        self.ctx['dump_resource_id'] = res.id

    # XXX: tempfix
    def get_state_attrs(self):
        res = userdata_base_task.Task.get_state_attrs(self)
        if "browse_last_date" not in res:
            res["browse_last_date"] = "20160410"
        return res

    def copy_external_data(self):
        # XXX this is a hack around the fact that noone knows origins of some tables,
        # and there are some mismatches between periods processing and preparat aggregation
        ctx = self.get_state_attrs()
        ctx.update(self.ctx)

        tables = [
            (
                'queries_stats/PERIOD/web/norm/split30',
                'queries_stats/wustr.qnorm._{yandex_last_date}/web/norm/split30'
            ),
            (
                'clicks_shows/wustr.qnorm._{yandex_last_date}/filtered/web',
                'clicks_shows/wustr.qnorm._{yandex_last_date}/data/web'
            ),
        ]
        for src, dst in tables:
            self.mr_client.copy_table(
                self.get_tables_prefix() + src.format(**ctx),
                self.get_tables_prefix() + dst.format(**ctx)
            )

    # TODO remove when there is no need to support older packages
    def set_subsystem_tags(self):
        tags = [
            "cluster={mr_cluster}_user_search_factors_long_web__{yandex_last_date}",
            "cluster={mr_cluster}_user_search_features_long_web__{yandex_last_date}",
            "cluster={mr_cluster}_user_browse_features_long_web__{browse_last_date}",
            "cluster={mr_cluster}_user_counters_features_long_web__{counters_last_date}",
            "cluster={mr_cluster}_user_counters_features_long_web__{counters_first_date}_{counters_last_date}"  # XXX: remove
        ]
        conn = self.rem_client.connector()
        for tag_tmpl in tags:
            tag = tag_tmpl.format(
                mr_cluster=self.ctx["mr_cluster"],
                **self.get_state_attrs()
            )
            logging.debug("setting tag " + tag)
            conn.Tag(tag).Set()

    def set_subsystem_meta(self):
        relations = [
            ("browse_last_date", "long_user_browse"),
            ("counters_last_date", "long_user_counters"),
            ("yandex_last_date", "long_user_search"),
        ]

        """
        We write (and read later) attrs straight into "//", which is wrong,
        but it doesn't really make sense to fix because this task is going
        to be removed soon.

        The updated state_meta_tool is unable to do that,
        so we're using YT CLI tool as a workaround.
        """
        unified_state_meta_tool = self.test_requirements(
            "USERFEAT_USERDATA_PACKAGE",
            trunk_revision=4249684,
            branch_num=34,
        )
        yt_bin = os.path.join(self.ctx["root"], "usr/bin/yt")
        yt_bin_command_prefix = (
            "{yt_bin}"
            " --proxy {yt_proxy}"
            " set".format(
                yt_bin=yt_bin,
                yt_proxy=self.ctx["real_mr_server"],
            )
        )

        def run_yt_set(key, value):
            run_process(
                "{yt_bin_command_prefix} {key} {value}".format(
                    yt_bin_command_prefix=yt_bin_command_prefix,
                    key=key,
                    value=value,
                ),
                shell=True,
                check=True,
                wait=True,
            )

        if unified_state_meta_tool:
            run_yt_set("//@userfeat_meta", "{}")

        for attr_name, state_name in relations:
            ds = self.get_state_attrs()[attr_name]

            if unified_state_meta_tool:
                run_yt_set("//@userfeat_meta/{}_meta".format(state_name), "{}")
                run_yt_set(
                    "//@userfeat_meta/{}_meta/state_for_production".format(state_name),
                    ds,
                )
            else:
                command = (
                    "YT_PROXY={yt_proxy} YT_PREFIX=//"
                    " {bin_dir}/state_meta_tool "
                    " state_set"
                    " --state {state_name}"
                    " --field state_for_production"
                    " --int-value {ds}".format(
                        yt_proxy=self.ctx["real_mr_server"],
                        bin_dir=self.get_project_bin_dir(),
                        state_name=state_name,
                        ds=ds,
                    )
                )
                logging.info("Setting %s subsystem date to %s", state_name, str(ds))
                run_process(
                    command,
                    shell=True,
                    check=True,
                    wait=True,
                    log_prefix="set_subsystem_meta.{}".format(state_name),
                )

    def patch_config(self, state_attrs):
        self.config_patcher.patch(
            os.path.join(self.ctx["berkanavt"], "userdata/scripts/userdata/userdata_config.py"),
            {
                "webQSIndexVersion": state_attrs.get("data_format_version", 12),  # XXX: tempfix
                "mrOpt": "stderrlevel=5,failonemptysrctable=1",
                "projectRoot": os.path.join(self.ctx["berkanavt"], "userdata")
            }
        )

    def get_project_bin_dir(self):
        return os.path.join(self.ctx["berkanavt"], "userdata/bin")

    def arcadia_url_for(self, part):
        arcadia_url = self.ctx['scripts_arcadia_url']
        if '@' in arcadia_url:
            arcadia_url, revision = arcadia_url.split('@', 1)
        else:
            revision = 'HEAD'
        return os.path.join(arcadia_url, part) + '@' + revision

    def init_files(self):
        userdata_base_task.Task.init_files(self)

        os.makedirs(os.path.join(self.ctx["berkanavt"], 'userdata', 'tmp'))  # XXX should be the problem of userdata_prep
        src = 'yweb/common/roboconf/conf-staging/host.cfg'
        for c in "testing", "production":
            hcfg_dir = os.path.join(self.ctx["berkanavt"], 'config/conf-' + c)
            hcfg_fname = os.path.join(hcfg_dir, 'host.cfg')
            make_folder(hcfg_dir)
            Arcadia.export(self.arcadia_url_for(src), hcfg_fname, force=True)  # overwrite

    def prepare_mr(self):
        userdata_base_task.Task.prepare_mr(self)
        self.copy_external_data()

    def process_mr_data(self):
        self.patch_config(self.get_state_attrs())
        self.set_subsystem_tags()
        self.set_subsystem_meta()

        paths = self.get_common_pythonpaths()
        for p in "", "/common":
            paths.append(os.path.join(self.ctx["berkanavt"], "userdata/scripts" + p))

        cmd = (
            "PYTHONPATH={} ".format(":".join(paths)) +
            "MR_CLUSTER_INFO={} ".format(self.ctx["mr_cluster_info"]) +
            self.get_client_environ_str() + " " +
            'python {}/userdata/scripts/userdata/comp_long.py'.format(self.ctx["berkanavt"]) +
            ' --mr_table_prefix ' + self.get_tables_prefix() +
            ' --no_src_depends' +
            ' --state_ts {base_timestamp}'
        ).format(**self.get_state_attrs())

        if self.test_requirements(
            "USERFEAT_USERDATA_PACKAGE",
            trunk_revision=4020785,
            branch_num=28,
        ):
            cmd += " --dont_publish_to_reactor "

        run_process(cmd, shell=True, check=True, wait=True, log_prefix="userdata_prep.full_comp_long")
        self.rem_client.wait_all_packets()

        # drop dirs with binaries and scripts to avoid unnecessary diffs
        dump_dst_dir = os.path.relpath(os.path.join(self.ctx["berkanavt"], 'userdata'), self.abs_path())
        for tgt in ["bin", "scripts", "data/arcadia_tests_data", "data/geodata", "data/quality_data", "data/urlrules", "data/urlrules.old"]:
            if os.path.exists(os.path.join(dump_dst_dir, tgt)):
                userdata_bin_dir = os.path.join(dump_dst_dir, tgt)
                logging.debug("deleting {}".format(userdata_bin_dir))
                shutil.rmtree(userdata_bin_dir)

        self.change_resource_basename(self.ctx['dump_resource_id'], os.path.join(self.abs_path(), dump_dst_dir))
        self.mark_resource_ready(self.ctx['dump_resource_id'])


__Task__ = UserdataPrepSingleRun
