import os
import os.path
import re
from datetime import timedelta

from sandbox.sandboxsdk.errors import SandboxTaskFailureError
from sandbox.sandboxsdk.process import run_process
from sandbox.sandboxsdk.paths import make_folder
import sandbox.sandboxsdk.parameters as sdk_parameters
from sandbox.projects import resource_types
from sandbox.projects.common.userdata import userdata_base_task, util
from sandbox.projects.logs import resources as us_resources


class UserSearchPackage(sdk_parameters.LastReleasedResource):
    name = 'user_search_package_resid'
    description = 'Resource with user_search package'
    resource_type = resource_types.USERFEAT_USER_SEARCH_PACKAGE
    group = userdata_base_task.PACKAGES_GROUP_NAME


class IpregResourceParameter(sdk_parameters.LastReleasedResource):
    name = 'ipreg_resource_id'
    description = 'Resource with IPREG and co'
    required = False  # don't require it yet
    resource_type = resource_types.IPREG_SET
    group = userdata_base_task.DATA_FILES_GROUP_NAME


class BlockstatDictResourceParameter(sdk_parameters.ResourceSelector):
    name = 'blockstat_dict_resource_id'
    description = 'Resource with blockstat.dict'
    required = False
    resource_type = us_resources.SESSIONS_BLOCKSTAT
    group = userdata_base_task.DATA_FILES_GROUP_NAME


class UserSearchUpdateDataRun(userdata_base_task.Task):
    """
        Runs user_search/scripts/update_long.py on a stored sample of user_sessions
    """

    type = 'USER_SEARCH_UPDATE_DATA'

    execution_space = 80000
    node_chunk_store_quota = 45 << 30
    forbid_chunk_storage_in_tmpfs = True
    yt_testable = True

    input_parameters = util.smart_join_params(
        userdata_base_task.Task.input_parameters,
        userdata_base_task.RflForCanonizerParameter,
        UserSearchPackage,
        IpregResourceParameter,
        BlockstatDictResourceParameter
    )

    @util.lazy_property
    def dates_context(self):
        attrs = self.get_state_attrs()
        res = {}

        for s in ['sc', 'yandex']:
            for attr in "_days_per_period", "_periods", "_last_date":
                res[s + attr] = attrs[s + attr]
            res[s + '_first_date'] = util.get_first_date(
                res[s + "_last_date"],
                int(res[s + "_periods"]) * int(res[s + "_days_per_period"])
            )
        return res

    def create_local_config(self):
        dates = dict(self.dates_context)
        update_delta = 1 if self.supports_update() else 0

        sc_last_date = dates["sc_last_date"]
        dates.update(
            sc_year=int(sc_last_date[:4]),
            sc_month=int(sc_last_date[4:6]),
            sc_day=int(sc_last_date[6:]),
            update_delta=update_delta
        )

        def custom_line_processor(l):
            m = re.search(r'([^:]+:.*)DailyDataPartsSpec\(([^,]+),.*', l)
            if m:
                k = m.group(1)
                prefix = m.group(2)
                ol = k + 'DailyDataPartsSpec(' + prefix + ', '
                if 'imgDaily' in k or 'vidDaily' in k:
                    pass
                elif "'sc" in k:
                    l = ol + "{sc_days_per_period}, {sc_periods}, lastDate=date({sc_year}, {sc_month}, {sc_day})),".format(**dates)
                else:
                    l = ol + "{yandex_days_per_period}, {yandex_periods} - {update_delta}),".format(**dates)

            m = re.search(r'([^:]+:.*)ThreshSet\(([^,]+),.*', l)
            if m:
                k = m.group(1)
                prefix = m.group(2)
                nl = k + 'ThreshSet(' + prefix + ','
                if '=' in k and 'lambda' in k:  # just a single property
                    if 'webPeriod' in k:
                        l = nl + '1,2,1,0))'
                    else:
                        l = nl + '1,1,1,0))'
                elif 'web.sc' in prefix or 'web.tr' in prefix:
                    l = nl + '1,1,1,0),'
                else:
                    l = nl + '2,2,2,0),'
            return l

        data = {
            "projectRoot": os.path.join(self.ctx['berkanavt'], 'user_search')
        }

        if self.test_requirements(
            "USERFEAT_USER_SEARCH_PACKAGE",
            trunk_revision=2566501,
        ):
            data.update({
                "sp_first_date": util.str2date(dates["yandex_first_date"]),
                "sp_sampling_size": 0,
                "sp_max_rand": 3
            })

        self.config_patcher.patch(
            os.path.join(self.ctx['berkanavt'], 'user_search/scripts/user_search/user_search_config.py'),
            data,
            custom_line_processor
        )

        last_comp_date = util.str2date(dates["yandex_last_date"]) - timedelta(int(dates["yandex_days_per_period"]))

        # for update
        for d in "data", "base":
            make_folder(os.path.join(self.ctx["berkanavt"], "user_search", d))
            with open(os.path.join(self.ctx["berkanavt"], "user_search", d, "web_last_date"), "w") as wld:
                print >> wld, util.date2str(last_comp_date)

    def get_project_bin_dir(self):
        return os.path.join(self.ctx["berkanavt"], "user_search/bin")

    def supports_update(self):
        return self.package_version("USERFEAT_USER_SEARCH_PACKAGE") >= 2546025

    def _get_periods(self):
        dates = self.dates_context
        if dates["yandex_periods"] < 2:
            raise SandboxTaskFailureError("Number of periods should be 2 or more")

        update_delta = 1 if self.supports_update() else 0
        total_days = (int(dates["yandex_periods"]) - update_delta) * int(dates["yandex_days_per_period"])

        res = [
            {
                "yandex_first_date": dates["yandex_first_date"],
                "yandex_last_date": util.get_last_date(dates["yandex_first_date"], total_days),
                "comp": True
            }
        ]
        if self.supports_update():
            res.append({
                "yandex_first_date": util.get_first_date(dates["yandex_last_date"], total_days),
                "yandex_last_date": dates["yandex_last_date"],
                "comp": False
            })
        return res

    def process_mr_data(self):
        self.create_local_config()

        paths = self.get_common_pythonpaths()
        for suffix in "", "/common":
            paths.append(os.path.join(self.ctx["berkanavt"], "user_search/scripts" + suffix))

        cmd_template = (
            "cd {berkanavt}/user_search/scripts/user_search;"
            "PYTHONPATH={pythonpath} "
            "MR_OPT= "
            "MR_CLUSTER_INFO={mr_cluster_info} " +
            "python {update_script} "
            "    --servs web "
            "    --last_date {yandex_last_date} "
            "    --sample_period_oldest_date {global_first_date} "
            "    {comp_args} "
            "    --emails kaa@yandex-team.ru "
            "    --no_src_depends "
            "    --mr_table_prefix {tables_prefix} "
        )

        ctx = self.ctx.copy()
        ctx["tables_prefix"] = self.get_tables_prefix()
        ctx["pythonpath"] = ":".join(paths)
        ctx["global_first_date"] = self.dates_context["yandex_first_date"]

        ctx["update_script"] = (
            './update_long.py'
            if os.path.exists(
                os.path.join(
                    self.ctx["berkanavt"],
                    'user_search/scripts/user_search',
                    'update_long.py'
                )
            )
            else
            './update_data.py'
        )

        for period in self._get_periods():
            ctx.update(**period)
            if period["comp"]:
                # specify comp_parts to exclude SurfCanyon, which is not supported by modern
                #  proc_period_mr (USERFEAT-923)
                # TODO : is it more appropriate to get comp_parts from
                #  user_search_config.webDailyDataPartsSpecs?
                ctx["comp_args"] = (
                    "--comp "
                    "--comp_parts yandex,yandex.comm,yandex.tr,yandex.tr.q,yandex.wcomm,tr "
                    "--preserve_prefix "
                )
            else:
                ctx["comp_args"] = "--preserve_prefix"
            cmd = cmd_template.format(**ctx)
            run_process(cmd, shell=True, check=True, wait=True, log_prefix='update_long.py')

            def callback():
                self.dump_rem_status()

            self.rem_client.wait_all_packets(callback=callback)

    def updated_result_attrs(self, attrs):
        ctx = self.dates_context
        attrs['sc_last_date'] = ctx['sc_last_date']
        attrs['sc_first_date'] = ctx['sc_first_date']
        attrs['yandex_last_date'] = ctx['yandex_last_date']
        attrs['yandex_first_date'] = ctx['yandex_first_date']
        return attrs


__Task__ = UserSearchUpdateDataRun
