from __future__ import print_function
from sandbox.sdk2 import (
    Requirements,
    parameters,
)
from sandbox.projects.yabs.base_bin_task import BaseBinTask
import logging
import time


YT_MAX_ROWS = 50000


class YabsUrlMonitoringInputGenerator(BaseBinTask):
    """Merges all source tables for URL monitoring to a single one with deduplicated records"""
    class Requirements(Requirements):
        cores = 1

        class Caches(Requirements.Caches):
            pass

    class Parameters(BaseBinTask.Parameters):
        max_restarts = 1
        description = "Generating input for URL monitoring"

        with BaseBinTask.Parameters.version_and_task_resource() as version_and_task_resource:
            resource_attrs = parameters.Dict("Filter resource by", default={"name": "YabsUrlMonitoring"})

        with parameters.Group("YT parameters") as yt_parameters:
            with parameters.String("Cluster", default="hahn") as cluster:
                cluster.values.hahn = "hahn"
                cluster.values.banach = "arnold"
                cluster.values.freud = "freud"

            with parameters.String("Meta cluster", default="markov") as meta_cluster:
                meta_cluster.values.pythia = "pythia"
                meta_cluster.values.markov = "markov"

            yav_secret = parameters.YavSecret(
                label="Yav secret id",
                required=True,
                description='secret should contain keys: yabs-url-mon-yt-token, yabs-url-mon-solomon-token',
                default="sec-01ct3gky5hfxyjer8z00qaw31v",
            )

        with parameters.Group("Input parameters") as in_parameters:
            input_dir = parameters.String("Source dir", default="//home/yabs/url_monitoring/sources")
            trusted_domains_table = parameters.String("Trusted domains table", default="//home/direct/export/trusted_domain")
            states_table = parameters.String("States table", default="//home/yabs/url_monitoring/states")
            ignore_period = parameters.Integer("Ignore period, in seconds", description='URLs that were updated at least this period ago will be excluded from old input table', default=24 * 60 * 60)

        with parameters.Group("Output parameters") as out_parameters:
            target_table = parameters.String("Target table", default="//home/yabs/url_monitoring/input")
            error_table = parameters.String("Table to write incorrect URLs", default="//home/yabs/url_monitoring/incorrect_urls")

        with parameters.Group("Statistics parameters") as statistics:
            host = parameters.String("Host to label metrics", default="sandbox")
            project = parameters.String("Solomon project", default="yabs_url_monitoring")
            solomon_api_url = parameters.String("Solomon API URL", default="https://solomon.yandex.net/api/v2/push")

    def generate_config(self):
        from yabs.stat.url_monitoring2.lib.config import Config

        d = {
            "statistics": {
                "host": self.Parameters.host,
                "project": self.Parameters.project,
                "solomon_api_url": self.Parameters.solomon_api_url,
            }
        }

        config = Config(d)
        return config

    def do_generate_input(self, ytc, ytc_meta, solomon_token, config, input_dir, trusted_domains_table, target_table, error_table, states_table, ignore_period):
        from yabs.stat.url_monitoring2.lib.misc import stage
        from yabs.stat.url_monitoring2.lib.input_generator import InputGenerator
        from yabs.stat.url_monitoring2.lib.constant import URLStatus

        with stage("prepare", solomon_token, config=config):
            old_ts = int(time.time()) - ignore_period
            query = "url from [{states}] where updated < {old_ts} and status = {untracked}".format(states=states_table, old_ts=old_ts, untracked=URLStatus.UNTRACKED.value)
            old_urls = list(ytc.select_rows(query, input_row_limit=100000000, output_row_limit=100000000))
            old_urls_set = {row['url'] for row in old_urls}

            unsorted_table = target_table + "_unsorted"
            input_generator = InputGenerator(ytc, input_dir, trusted_domains_table, target_table, unsorted_table, error_table, old_urls_set)
            op = input_generator.schedule()

            operation_link = 'YT operation: <a href="%s">%s</a>' % (op.url, op.id)
            self.set_info(operation_link, do_escape=False)

            input_generator.wait()
            input_generator.add_stats()

        with stage("sort_input", solomon_token, config=config):
            ytc.run_sort(unsorted_table, destination_table=target_table, sort_by="url")

        meta_table = states_table + "_replica"
        logging.info("Deleting old urls from states table: %s", old_urls_set)
        for i in range(0, len(old_urls), YT_MAX_ROWS):
            ytc_meta.delete_rows(meta_table, old_urls[i:i + YT_MAX_ROWS])

    def on_execute(self):
        from yt.wrapper import YtClient

        yt_token = self.Parameters.yav_secret.data()["yabs-url-mon-yt-token"]
        solomon_token = self.Parameters.yav_secret.data()["yabs-url-mon-solomon-token"]
        ytc = YtClient(proxy=self.Parameters.cluster, token=yt_token)
        ytc_meta = YtClient(proxy=self.Parameters.meta_cluster, token=yt_token)

        config = self.generate_config()
        self.do_generate_input(ytc, ytc_meta, solomon_token, config, self.Parameters.input_dir, self.Parameters.trusted_domains_table, self.Parameters.target_table,
                                self.Parameters.error_table, self.Parameters.states_table, self.Parameters.ignore_period)
