from datetime import date, timedelta

from sandbox.sandboxsdk.parameters import (
    SandboxStringParameter,
    SandboxIntegerParameter,
)
from sandbox.sandboxsdk.errors import SandboxTaskFailureError

from sandbox.projects.common.userdata import sample_base_task, util, mr_base_task


class LargeSampleSize(SandboxIntegerParameter):
    name = 'large_sample_size'
    description = 'Num rows for "large" tables sample:'
    default_value = 2000000
    required = True
    group = mr_base_task.INPUT_PARAMS_GROUP_NAME


class SmallSampleSize(SandboxIntegerParameter):
    name = 'small_sample_size'
    description = 'Num rows for "small" tables sample:'
    default_value = 200000
    required = True
    group = mr_base_task.INPUT_PARAMS_GROUP_NAME


class WikiCountries(SandboxStringParameter):
    name = 'wiki_countries'
    description = 'Countries to extract wiki data for (csv ISO alpha-2):'
    default_value = "be,ru,uk,en,tr,de"
    required = True
    group = mr_base_task.INPUT_PARAMS_GROUP_NAME


class BeastCountries(SandboxStringParameter):
    name = 'beast_countries'
    description = 'Countries to extract BEAST data for (csv names):'
    default_value = "turkey,indonesia"
    required = True
    group = mr_base_task.INPUT_PARAMS_GROUP_NAME


class BeastLastDate(SandboxStringParameter):
    name = 'beast_last_date'
    description = 'Last date of beast period, YYYYMMDD (optional, will use last available period if empty):'
    required = False
    group = mr_base_task.INPUT_PARAMS_GROUP_NAME


class DaysPerBeastPeriod(SandboxIntegerParameter):
    name = 'days_per_beast_period'
    description = 'Days per beast period:'
    required = True
    default_value = 3
    group = mr_base_task.INPUT_PARAMS_GROUP_NAME


class SampleMiscdataSources(sample_base_task.Task):
    """
    Sample various tables that are used as inputs by md_prep
    """

    type = 'SAMPLE_MISCDATA_SOURCES'

    input_parameters = util.smart_join_params(
        sample_base_task.Task.input_parameters,
        WikiCountries,
        BeastCountries,
        BeastLastDate,
        DaysPerBeastPeriod,
        LargeSampleSize,
        SmallSampleSize
    )

    def updated_result_attrs(self, attrs):
        attrs = sample_base_task.Task.updated_result_attrs(self, attrs)
        dates = self.get_dates_context()
        attrs.update({
            'last_date': dates['beast_end_date'],
            'first_date': dates['beast_start_date'],
            'wiki_countries': self.ctx['wiki_countries'],
            'beast_countries': self.ctx['beast_countries'],
        })
        return attrs

    def do_get_dates_context(self):
        num_days = self.ctx[DaysPerBeastPeriod.name]
        if self.ctx.get(BeastLastDate.name):
            end_date = util.str2date(self.ctx[BeastLastDate.name])
        else:
            end_date = date.today() - timedelta(3)
        start_date = end_date - timedelta(num_days - 1)
        if end_date >= date.today():
            raise SandboxTaskFailureError("End date {} is in the future".format(
                util.date2str(end_date)
            ))

        return {
            "descr": util.date2str(start_date) + '-' + util.date2str(end_date),
            "beast_start_date": util.date2str(start_date),
            "beast_end_date": util.date2str(end_date),
            "beast_num_days": int(num_days),
        }

    def do_mr_sample(self):
        dates = self.ctx["dates_context"] = self.get_dates_context()
        pr_a = util.ProcessRunner()
        pr_b = util.ProcessRunner()

        ctx = {
            "src_prefix": self.get_mr_src_prefix(),
            "dst_prefix": self.get_tables_prefix(),
            "env": self.get_client_environ_str(),
            "bin_dir": self.ctx["bin_dir"],
            "mr_server": self.ctx["mr_server"],
        }
        for country in self.ctx[WikiCountries.name].split(','):
            ctx["yai_wiki"] = "home/wikilinks/{}wiki".format(country)
            pr_a.add(
                "yai_external_links." + country,
                self.get_mr_sample_command(
                    table=ctx["yai_wiki"] + "/external_links",
                    count=self.ctx["large_sample_size"]
                )
            )
            pr_a.add(
                "yai_text." + country,
                self.get_mr_sample_command(
                    table=ctx["yai_wiki"] + "/text",
                    count=self.ctx["small_sample_size"]
                )
            )
            pr_b.add(
                "yai_pages_ids." + country,
                "{env} {bin_dir}/mr_set_ops -s {mr_server} -filter "
                "{src_prefix}{yai_wiki}/pages_ids "
                "{dst_prefix}{yai_wiki}/external_links "
                "{dst_prefix}{yai_wiki}/pages_ids ",
                **ctx
            )

        for country in self.ctx[BeastCountries.name].split(','):
            cur_date = util.str2date(dates['beast_start_date'])
            last_date = util.str2date(dates['beast_end_date'])
            while cur_date <= last_date:
                date_str = util.date2str(cur_date)
                table = "home/freshness/beast/old/{}/{}/WEB/GOOGLE/JSON".format(
                    date_str, country.upper()
                )
                pr_a.add(
                    "beast." + date_str + "." + country,
                    self.get_mr_sample_command(
                        table=table,
                        count=self.ctx["small_sample_size"]
                    )
                )
                cur_date += timedelta(1)

        pr_a.add(
            "videotop",
            self.get_mr_sample_command(
                table="videotop/bannedvideo",
                count=self.ctx["large_sample_size"],
                src_infix="userfeat/"
            )
        )

        pr_a.add(
            "browser_bookmarks",
            self.get_mr_sample_command(
                table="extdata/browser_bookmarks",
                count=self.ctx["large_sample_size"],
                src_infix="userfeat/"
            )
        )
        pr_a.run()
        pr_b.run()


__Task__ = SampleMiscdataSources
