# -*- coding: utf-8 -*-

import logging

from sandbox import sdk2
import sandbox.common.types.resource as ctr

from sandbox.projects.ads.common import YangConversionBannersBin

from sandbox.sdk2.helpers import subprocess as sp

from sandbox.projects.adv_machine.common import process_wrapper


logger = logging.getLogger(__name__)


class PrepareBestMatchPool(sdk2.Task):

    """Prepare poll for best match model"""

    class Requirements(sdk2.Task.Requirements):
        cores = 1

        class Caches(sdk2.Requirements.Caches):
            pass

    class Parameters(sdk2.Task.Parameters):
        kill_timeout = 24 * 60 * 60
        with sdk2.parameters.Group('Parameters') as parameters:
            input_dir = sdk2.parameters.String("Input dir", required=True)
            output_dir = sdk2.parameters.String("Output dir", required=True)
            date = sdk2.parameters.String("YYYY-MM-DD day to grep (or regular)", required=True)
            best_match_model = sdk2.parameters.String("Best match model file", required=True)
            synonyms_model = sdk2.parameters.String("Synonyms model file", required=True)
            synonyms_treshold = sdk2.parameters.Float("Synonyms treshold", default_value=0.7)
            caesar_table = sdk2.parameters.String("Caesar table", default_value="//home/bigb/caesar/stable/AdGroups")
            rows_to_grep = sdk2.parameters.Integer("Rows to grep", default_value=100000)
            rows_to_score = sdk2.parameters.Integer("Rows to score", default_value=3000000)
            bins_count = sdk2.parameters.Integer("Bins count", default_value=20)
            rows_in_bin = sdk2.parameters.Integer("Rows count in one bin", default_value=1000)

        with sdk2.parameters.Group('YT parameters') as yt_block:
            yt_proxy = sdk2.parameters.String('YT proxy', default='hahn')
            yt_token_vault = sdk2.parameters.String('YT_TOKEN vault name', default='adv_machine_yt_token')
            yt_pool = sdk2.parameters.String('YT_POOL', default='adv-machine-research')

        with sdk2.parameters.Group('Resources') as resources_block:
            yang_conversion_banners_res_id = sdk2.parameters.LastReleasedResource(
                'yang_conversion_banners binary resource',
                resource_type=YangConversionBannersBin,
                state=(ctr.State.READY, ),
                required=True,
            )

    def on_execute(self):
        bin_res = sdk2.ResourceData(self.Parameters.yang_conversion_banners_res_id)

        cmd = [
            str(bin_res.path), 'prepare-data-for-best-match',
            '--i', self.Parameters.input_dir,
            '--o', self.Parameters.output_dir,
            '--d', self.Parameters.date,
            '--m', self.Parameters.best_match_model,
            '--s', self.Parameters.synonyms_model,
            '--st', str(self.Parameters.synonyms_treshold),
            '--c', self.Parameters.caesar_table,
            '--g', str(self.Parameters.rows_to_grep),
            '--t', str(self.Parameters.rows_to_score),
            '--b', str(self.Parameters.bins_count),
            '--r', str(self.Parameters.rows_in_bin)
        ]

        env = {'MR_RUNTIME': 'YT'}
        if self.Parameters.yt_token_vault:
            env['YT_TOKEN'] = sdk2.Vault.data(self.Parameters.yt_token_vault)
        if self.Parameters.yt_pool:
            env['YT_POOL'] = self.Parameters.yt_pool

        with process_wrapper(self, logger='prepare_best_match_pool') as pl:
            sp.check_call(cmd, stdout=pl.stdout, stderr=pl.stderr, env=env)
