import os
import logging
from datetime import timedelta
import re

from sandbox.sandboxsdk.paths import make_folder
import sandbox.sandboxsdk.parameters as sdk_parameters

from sandbox.projects import resource_types
from sandbox.projects.common.userdata import userdata_base_task
from sandbox.projects.common.userdata import util


class UserCountersPackage(sdk_parameters.LastReleasedResource):
    name = 'user_counters_package_resid'
    description = 'Resource with user_counters package'
    resource_type = resource_types.USERFEAT_USER_COUNTERS_PACKAGE
    group = userdata_base_task.PACKAGES_GROUP_NAME


class UserCountersUpdateDataRun(userdata_base_task.Task):
    """
        Runs user_counters/scripts/update.py on a stored sample of user_sessions
    """

    type = 'USER_COUNTERS_UPDATE_DATA'

    execution_space = 18000
    yt_testable = True

    input_parameters = util.smart_join_params(
        userdata_base_task.Task.input_parameters,
        userdata_base_task.RflForCanonizerParameter,
        UserCountersPackage,
    )

    def dates_context(self):
        return self.get_state_attrs()

    def create_local_config(self):
        dates_context = self.dates_context()
        self.config_patcher.patch(
            os.path.join(self.ctx['berkanavt'], "user_counters/scripts/user_counters/user_counters_config.py"),
            {
                "projectRoot": os.path.join(self.ctx['berkanavt'], 'user_counters'),
                "mrOpts": "stderrlevel=5,failonemptysrctable=1",
                "userLogsRemHostname": "",  # XXX
                "collectPeriod": int(dates_context['period'])
            }
        )

        for d in 'data', 'base':
            make_folder(os.path.join(self.ctx['berkanavt'], 'user_counters', d))
            with open(os.path.join(self.ctx['berkanavt'], 'user_counters', d, 'last_date'), 'w') as wld:
                print >> wld, dates_context['last_date']

    def prepare_mr(self):
        userdata_base_task.Task.prepare_mr(self)
        for t in self.mr_client.get_tables_list(self.get_tables_prefix()):
            nt = re.sub(r'\bwatch_log\b', 'watch_log_tskv', t)
            if t != nt:
                self.mr_client.copy_table(t, nt)

    def get_project_bin_dir(self):
        return os.path.join(self.ctx["berkanavt"], "user_counters/bin")

    def process_mr_data(self):
        self.create_local_config()

        paths = self.get_common_pythonpaths()
        for suffix in "", "/common":
            paths.append(os.path.join(self.ctx["berkanavt"], "user_counters/scripts" + suffix))

        cmd = (
            "cd {berkanavt}/user_counters/scripts/user_counters/; "
            "PYTHONPATH={pythonpath} "
            "MR_NET_TABLE= "
            "MR_USER= "
            "MR_OPT= "
            "MR_TABLE_PREFIX={tables_prefix} "
            "MR_CLUSTER_INFO={mr_cluster_info} "
            "./daily.py "
            "    --date {date} "
            "    -v "
            "    --force-{updating} "
            "    --comp "
        )
        ctx = self.ctx.copy()
        ctx.update(self.dates_context())
        ctx["tables_prefix"] = self.get_tables_prefix()
        ctx["pythonpath"] = ":".join(paths)

        # set source tags
        last_date = util.str2date(ctx['last_date'])
        cur = last_date - timedelta(int(ctx['real_period']))
        conn = self.rem_client.connector()
        while cur <= last_date:
            tag_name = "cluster={}_publish_watch_log_tskv_sessions_{}".format(ctx['mr_cluster'], util.date2str(cur))
            logging.debug("Setting source tag {}".format(tag_name))
            conn.Tag(tag_name).Set()
            cur = cur + timedelta(1)

        util.run_shell_process(
            "update_data.py.intermediate", cmd,
            rem_scripts_dir=self.rem_runner.get_scripts_dir(),
            date=ctx['intermediate_date'],
            updating='skip-updating',
            **ctx
        )

        self.rem_client.wait_all_packets()

        util.run_shell_process(
            "update_data.py.final", cmd,
            rem_scripts_dir=self.rem_runner.get_scripts_dir(),
            date=ctx['last_date'],
            updating='updating',
            **ctx
        )
        self.rem_client.wait_all_packets()

    def updated_result_attrs(self, attrs):
        ctx = self.dates_context()
        attrs['counters_last_date'] = ctx['last_date']
        attrs['counters_first_date'] = util.date2str(util.str2date(ctx['last_date']) - timedelta(int(ctx['period'])-1))
        return attrs


__Task__ = UserCountersUpdateDataRun
