# -*- coding: utf-8 -*-
from sandbox import sdk2

from sandbox.projects.modadvert.common import modadvert
from sandbox.projects.modadvert import resource_types


class ModadvertFindSpamNgram(modadvert.ModadvertBaseYtTask):
    """
    MODADVERT-50: Calculate ngram statistics.
    """

    class Parameters(modadvert.ModadvertBaseYtTask.Parameters):
        with sdk2.parameters.Group('Binary') as binary_group:
            binaries_resource = sdk2.parameters.Resource(
                'Resource with find_spam_ngrams binary',
                resource_type=[resource_types.MODADVERT_SPAM_NGRAMS_FINDER_BINARY],
                required=True
            )

        with sdk2.parameters.Group('Options') as options_group:
            banners_table = sdk2.parameters.String('Banners table', default='//home/direct-moderate/prod-banners')
            users_table = sdk2.parameters.String('Users table', default='//home/direct/db/users')
            bad_banners_table = sdk2.parameters.String('Joined abuses', default='//home/modadvert/AbuseRemoderationJoined')
            take_all = sdk2.parameters.Bool('Take all', default=False)
            dst_table = sdk2.parameters.String('Destination', default='//home/modadvert/test/korneev/AbuseSpamNGrams')
            max_ngram_length = sdk2.parameters.Integer('Max ngram length', default=3)
            max_ngram_count = sdk2.parameters.Integer('Max ngram count', default=20)
            min_rel_size = sdk2.parameters.Float('Minimum relative size', default=0.99)
            min_rel_common_size = sdk2.parameters.Float('Minimum relative common size', default=0.35)
            max_invalid_common_row_count = sdk2.parameters.Integer('Maximum invalid common row count', default=6000)


    def on_before_execute(self):
        super(ModadvertFindSpamNgram, self).on_before_execute()
        self.untar_resource(self.Parameters.binaries_resource)

    def on_execute_inner(self):
        binary_cmd = [
            './find_spam_ngrams',
            '--yt-proxy-url', self.Parameters.yt_proxy_url,
            '--yt-worker-proxy-url', self.Parameters.yt_worker_proxy_url,
            '--banners-table', self.Parameters.banners_table,
            '--users-table', self.Parameters.users_table,
            '--bad-banners-table', self.Parameters.bad_banners_table,
            '--dst-table', self.Parameters.dst_table,
            '--max-ngram-length', self.Parameters.max_ngram_length,
            '--max-ngram-count', self.Parameters.max_ngram_count,
            '--min-rel-size', self.Parameters.min_rel_size,
            '--min-rel-common-size', self.Parameters.min_rel_common_size,
            '--max-invalid-common-row-count', self.Parameters.max_invalid_common_row_count,
        ]
        if self.Parameters.take_all:
            binary_cmd.append('--take-all')
        self.run_command(binary_cmd)
