from datetime import date, timedelta
import re

from sandbox.sandboxsdk.parameters import (
    SandboxStringParameter,
    SandboxIntegerParameter,
    SandboxFloatParameter,
)

from sandbox.sandboxsdk.errors import SandboxTaskFailureError

from sandbox.projects.common.userdata import sample_base_task, util, mr_base_task

#    name = 'sc_last_date'
#    default_value = 20140228


#    name = 'sc_periods'
#    default_value = 2
#            'sc_last_date': self.ctx['_sc_end_date'],
#            'sc_periods': self.ctx['sc_periods'],


class YandexLastDate(SandboxStringParameter):
    name = 'yandex_last_date'
    description = 'Last date of Yandex sessions period, YYYYMMDD (optional, will use last available period if empty):'
    required = False
    group = mr_base_task.INPUT_PARAMS_GROUP_NAME


class DaysPerPeriod(SandboxIntegerParameter):
    name = 'days_per_period'
    description = 'Days per period:'
    required = True
    default_value = 1
    group = mr_base_task.INPUT_PARAMS_GROUP_NAME


class YandexNumberOfPeriods(SandboxIntegerParameter):
    name = 'yandex_periods'
    description = 'Number of Yandex periods to sample:'
    required = True
    default_value = 20
    group = mr_base_task.INPUT_PARAMS_GROUP_NAME


class YandexBaseFrac(SandboxFloatParameter):
    name = 'yandex_frac'
    description = 'Base fraction of Yandex sessions to collect:'
    required = True
    default_value = 0.00001
    group = mr_base_task.MISC_PARAMS_GROUP_NAME


class WhiteUsersFrac(SandboxFloatParameter):
    name = 'white_users_frac'
    description = 'Make this fraction of users "white":'
    required = False
    default = 1.0
    group = mr_base_task.MISC_PARAMS_GROUP_NAME


class QueriesStatsCount(SandboxIntegerParameter):
    name = 'queries_stats_count'
    description = 'Number of rows from queries_stats table:'
    required = False
    default = 5000000
    group = mr_base_task.MISC_PARAMS_GROUP_NAME


class SampleUserSearchSessions(sample_base_task.Task):
    """
    Sample user_sessions web and surfcanyon, to test user_search/update.py
    """

    type = 'SAMPLE_USER_SEARCH_SESSIONS'

    input_parameters = util.smart_join_params(
        sample_base_task.Task.input_parameters,
        DaysPerPeriod,
        YandexLastDate,
        YandexNumberOfPeriods,
        YandexBaseFrac,
        WhiteUsersFrac,
        QueriesStatsCount
    )

    def do_get_dates_context(self):
        num_days = self.ctx[YandexNumberOfPeriods.name] * self.ctx[DaysPerPeriod.name]
        if self.ctx.get(YandexLastDate.name):
            end_date = util.str2date(self.ctx[YandexLastDate.name])
        else:
            end_date = date.today() - timedelta(3)
        start_date = end_date - timedelta(num_days - 1)
        if end_date >= date.today():
            raise SandboxTaskFailureError("{} is in the future".format(util.date2str(end_date)))

        return {
            'yandex_start_date': util.date2str(start_date),
            'yandex_end_date': util.date2str(end_date),
            'descr': "yandex-" + util.date2str(start_date) + '-' + util.date2str(end_date)
        }

    def get_strat_data_dir(self):
        return 'quality/user_search/scripts/testing'

    def do_mr_sample(self):
        pr = util.ProcessRunner()
        pr_white = util.ProcessRunner()
        dates = self.get_dates_context()

        cur_date = util.str2date(dates['yandex_start_date'])
        last_date = util.str2date(dates['yandex_end_date'])
        while cur_date <= last_date:
            date_str = util.date2str(cur_date)
            date_str_yt = util.date2str_yt(cur_date)
            pr.add(
                "user_sessions." + date_str,
                self.get_sample_by_uid_command(
                    table="user_sessions/pub/search/daily/{}/clean".format(date_str_yt),
                    frac=self.ctx[YandexBaseFrac.name],
                    strat_config="strat-yandex.json"
                )
            )
            pr.add(
                "user_intents." + date_str,
                self.get_sample_by_uid_command(
                    table="home/abt/user_intents/{}".format(date_str),
                    frac=self.ctx[YandexBaseFrac.name] * 5
                )
            )
            pr_white.add(
                "white_users." + date_str,
                "{env} {bin_dir}/user_sessions_create_synthetic_white_users "
                "--server {server} --frac {frac} --salt '{salt}' "
                "--source {dst_prefix}user_sessions/pub/search/daily/{date_yt}/clean "
                "--dest {dst_prefix}home/antifraud/daily/cleaning/{date_yt}/white_users",
                env=self.get_client_environ_str(),
                bin_dir=self.ctx["bin_dir"],
                salt=self.ctx["sampling_salt"],
                server=self.ctx["mr_server"],
                frac=self.ctx[WhiteUsersFrac.name],
                dst_prefix=self.get_tables_prefix(),
                date_yt=date_str_yt
            )
            cur_date += timedelta(1)
        qs_tables = sorted(self.mr_client.get_tables_list("userfeat/queries_stats/"))
        if not qs_tables:
            raise SandboxTaskFailureError("There are no tables at //userfeat/queries_stats")

        last_qs_table = qs_tables[-1]
        dst_table_template = re.sub(r"\d{8}", "{}", last_qs_table)
        dst_table_template = dst_table_template.replace("userfeat/", "")
        pr.add(
            "queries_stats.sample1",
            self.get_mr_sample_command(
                last_qs_table,
                self.ctx[QueriesStatsCount.name],
                dst_table=dst_table_template.format(util.date2str(last_date)),
                by_keys=True
            ),
        )
        pr.add(
            "queries_stats.sample2",
            self.get_mr_sample_command(
                last_qs_table,
                self.ctx[QueriesStatsCount.name],
                dst_table=dst_table_template.format(util.date2str(last_date - timedelta(days=1))),
                by_keys=True
            ),
        )
        pr.run()
        pr_white.run()

    def updated_result_attrs(self, attrs):
        attrs = sample_base_task.Task.updated_result_attrs(self, attrs)
        dates = self.get_dates_context()
        attrs.update({
            'yandex_last_date': dates['yandex_end_date'],
            'yandex_periods': self.ctx['yandex_periods'],
            'yandex_days_per_period': self.ctx[DaysPerPeriod.name]
        })
        return attrs


__Task__ = SampleUserSearchSessions
