# -*- coding: utf-8 -*-
import datetime
from sandbox import sdk2

from sandbox.sandboxsdk.environments import PipEnvironment
from sandbox.projects.modadvert.common import modadvert
from sandbox.projects.modadvert import resource_types
from sandbox.projects.modadvert.RunBinaryWithConfig import ModadvertRunBinaryWithConfig
from sandbox.projects.yql.RunYQL2 import RunYQL2


DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
DIFF_QUERY = '''
/* Code from sandbox/projects/modadvert/BrandsClassifier/__init__.py */
USE %CLUSTER%;
PRAGMA yt.InferSchema;

$list_formatter = ($l) -> {
    RETURN
        String::JoinFromList(
            ListMap(
                ListSort(ListUniq(Yson::ConvertToInt64List($l))),
                ($s) -> {RETURN CAST($s AS String);}
            ),
            ","
        );
};

INSERT INTO `%DIFF_TABLE%`
SELECT
    first.type AS type,
    first.id AS id,
    first.order_id AS order_id,
    first.brand_ids AS brand_ids
FROM
    `%NEW_BRANDS_TABLE%` AS first
LEFT JOIN
    `%PREVIOUS_BRANDS_TABLE%` AS second
ON
    first.type == second.type AND first.id == second.id
WHERE
    second.brand_ids IS NULL
OR
    $list_formatter(first.brand_ids) != $list_formatter(second.brand_ids)
ORDER BY
    type, id
;
'''


class ModadvertBrandsClassifier(modadvert.ModadvertBaseYtTask):

    class Requirements(modadvert.ModadvertBaseYtTask.Requirements):
        environments = (
            PipEnvironment('yandex-yt', '0.8.38a1', use_wheel=True),
            PipEnvironment('yandex-yt-yson-bindings-skynet', use_wheel=True),
        )

    class Parameters(modadvert.ModadvertBaseYtTask.Parameters):
        working_directory = sdk2.parameters.String('Working directory')
        compute_diff = sdk2.parameters.Bool('Compute diff with latest', default=True)
        with sdk2.parameters.Group('Classifier') as classifier_group:
            classifier_resource = sdk2.parameters.Resource(
                'Resource with classifier',
                resource_type=resource_types.MODADVERT_BRANDS_CLASSIFIER
            )
            brands_config_resource = sdk2.parameters.Resource(
                'Resource with brands config',
                resource_type=resource_types.BRANDS_CONFIG_TYPES
            )
            classification_types = sdk2.parameters.List('Types for classification')
        with sdk2.parameters.Group('Inserter') as inserter_group:
            inserter_resource = sdk2.parameters.Resource(
                'Resource with inserter',
                resource_type=resource_types.YA_PACKAGE
            )
            destination_table = sdk2.parameters.String('Destination table')
        with sdk2.parameters.Group('Timeouts') as timeouts_group:
            timeout_classifier = sdk2.parameters.Integer('Classifier timeout', default=10)
            timeout_calc_diff = sdk2.parameters.Integer('Calc diff timeout', default=1)
            timeout_inserter = sdk2.parameters.Integer('Inserter timeout', default=15)

    def on_before_execute(self):
        super(ModadvertBrandsClassifier, self).on_before_execute()
        if not self.Context.start_time:
            import yt.wrapper
            self.Context.start_time = datetime.datetime.now().strftime(DATETIME_FORMAT)
            self.Context.brands_table = yt.wrapper.ypath_join(self.Parameters.working_directory, self.Context.start_time)
            self.Context.latest_brands_table = yt.wrapper.ypath_join(self.Parameters.working_directory, 'latest')
            if self.Parameters.compute_diff:
                self.Context.diff_table = yt.wrapper.ypath_join(self.Parameters.working_directory, 'diff_' + self.Context.start_time)
            else:
                self.Context.diff_table = self.Context.brands_table

    def get_default_subtask_parameters(self):
        return {
            'owner': self.owner,
            'priority': self.Parameters.priority,
        }

    def on_execute_inner(self):
        if not self.Context.classifier_task:
            self.Context.classifier_task = self.create_subtask(
                ModadvertRunBinaryWithConfig,
                {
                    'kill_timeout': datetime.timedelta(hours=self.Parameters.timeout_classifier).total_seconds(),
                    'vault_user': self.author,
                    'binary_resource': self.Parameters.classifier_resource.id,
                    'config_resource': self.Parameters.brands_config_resource.id,
                    'base_cmd': './brands_classifier',
                    'cmd_options': {
                        '--working-cluster': self.Parameters.yt_proxy_url,
                        '--prefix': '//home/direct-moderate',
                        '--brands-conf': './config.yaml',
                        '--output-table': self.Context.brands_table,
                        '--types': ' '.join(self.Parameters.classification_types or []),
                    },
                    'debug': False,
                },
                description='Brands classfier. Running for task {}'.format(self.id),
            )
        self.wait_all_subtasks()

        if self.Parameters.compute_diff and not self.Context.differ_task:
            self.Context.differ_task = self.create_subtask(
                RunYQL2,
                {
                    'kill_timeout': datetime.timedelta(hours=self.Parameters.timeout_calc_diff).total_seconds(),
                    'query': DIFF_QUERY,
                    'custom_placeholders': {
                        '%CLUSTER%': self.Parameters.yt_proxy_url,
                        '%DIFF_TABLE%': self.Context.diff_table,
                        '%NEW_BRANDS_TABLE%': self.Context.brands_table,
                        '%PREVIOUS_BRANDS_TABLE%': self.Context.latest_brands_table,
                    },
                    'trace_query': True,
                    'use_v1_syntax': True,
                },
                description='Diff between two brands tables. Running for task {}'.format(self.id),
            )
        self.wait_all_subtasks()

        if not self.Context.inserter_task:
            self.Context.inserter_task = self.create_subtask(
                ModadvertRunBinaryWithConfig,
                {
                    'kill_timeout': datetime.timedelta(hours=self.Parameters.timeout_inserter).total_seconds(),
                    'vault_user': self.author,
                    'binary_resource': self.Parameters.inserter_resource.id,
                    'base_cmd': './dynamic_table_inserter',
                    'cmd_options': {
                        '--yt-proxy-url': self.Parameters.yt_proxy_url,
                        '--source-table': self.Context.diff_table,
                        '--destination-table': self.Parameters.destination_table,
                        '--destination-yt-proxy-url': 'markov',
                        '--user-slots': '3',
                        '--insertion-chunk-size': '10000',
                    },
                    'debug': False,
                },
                description='Dynamic table inserter. Running for task {}'.format(self.id),
            )
        self.wait_all_subtasks()

        import yt.wrapper
        yt_client = yt.wrapper.YtClient(self.Parameters.yt_proxy_url, self.get_yt_token())
        yt_client.link(self.Context.brands_table, self.Context.latest_brands_table, force=True)
