import os
import subprocess

from sandbox import sdk2
from sandbox.common.types.resource import State as ResourceState
from sandbox.projects.resource_types import ADDRS_BUSINESS_SHARD, ORGS_MAIN_RUBRICS_BIN
from sandbox.projects.yql.RunYQL2 import RunYQL2

from sandbox.projects.geoadv_ranking.lib import AbstractSimilarAdvertTask, CommonSimilarAdvertParameters
from sandbox.projects.geoadv_ranking.lib.subtasks import wait_subtasks, validate_subtask_statuses
from sandbox.projects.geoadv_ranking.lib.tokens import NANNY_ROBOT_EXTDATA_SECRET_ID, SECRET_KEY
from sandbox.projects.geoadv_ranking.resources import PrepareDataFromShardExecutable


CALCULATE_RUBRIC_QUERY = """USE hahn;

INSERT INTO `{output_table}` WITH TRUNCATE
SELECT a.rubric_id AS rubric_id, a.frequency AS frequency,
       Unwrap(b.serp_data) AS serp_data, Unwrap(b.parent_rubric_id) AS parent_rubric_id,
       Unwrap(b.features) AS features
FROM (
    SELECT rubric_id, COUNT(*) AS frequency
    FROM (
        SELECT ListExtract(data.Rubrics, "Id") AS rubric_ids
        FROM `{input_table}`
    )
    FLATTEN LIST BY rubric_ids AS rubric_id
    GROUP BY rubric_id
) AS a
LEFT JOIN ANY `{rubric_table}` AS b
USING (rubric_id)
"""

EXPIRATION_TIME = 3 * 24 * 60 * 60 * 1000


class CommonSimilarAdvertV2Parameters(CommonSimilarAdvertParameters):
    prepare_executable = sdk2.parameters.LastReleasedResource(
        'Executable for uploading data from shard',
        resource_type=PrepareDataFromShardExecutable,
        state=(ResourceState.READY,),
        required=False,
    )


class UpdateBusinessDataFromAddrsBaseShard(AbstractSimilarAdvertTask):
    class Requirements(AbstractSimilarAdvertTask.Requirements):
        ram = 16 * 1024

    class Parameters(CommonSimilarAdvertV2Parameters):
        output_table = sdk2.parameters.String(
            'Path to YT table to put required data from business shard',
            required=True,
        )
        rubric_table = sdk2.parameters.String('Path to YT table to put info about rubrics', required=True)

        shard_task = sdk2.parameters.Integer('Task for required shard', required=True)

    def on_execute(self):
        os.environ['YT_TOKEN'] = self._get_yt_token()

        shard_resource = sdk2.ResourceData(
            sdk2.Resource[ADDRS_BUSINESS_SHARD].find(task=sdk2.Task[self.Parameters.shard_task]).first(),
        )

        binary = sdk2.ResourceData(self.Parameters.prepare_executable)

        cmd_params = [
            str(binary.path),
            '-i', str(shard_resource.path),
            '-o', self.Parameters.output_table,
            '-r', self.Parameters.rubric_table,
            '--main-rubrics', self._sync_stable_resource(ORGS_MAIN_RUBRICS_BIN),
        ]

        with sdk2.helpers.ProcessLog(self, logger='index_build') as process_log:
            subprocess.check_call(cmd_params, stdout=process_log.stdout, stderr=subprocess.STDOUT)


class UpdateBusinessDataFromAddrsBase(AbstractSimilarAdvertTask):
    DEFAULT_SEMAPHORE_NAME = 'similar_adverts_update_business_data'

    class Parameters(CommonSimilarAdvertV2Parameters):
        nanny_yav_secret = sdk2.parameters.YavSecret(
            'Yav secret with token for nanny',
            required=True,
            default_value=NANNY_ROBOT_EXTDATA_SECRET_ID,
        )

        output_table = sdk2.parameters.String(
            'Path to YT table to put required data from business index',
            required=True,
        )

        rubric_table = sdk2.parameters.String(
            'Path on yt to put info on rubrics',
            required=True,
        )

        yql_token_vault_name = sdk2.parameters.String(
            'Name of vault item with token to run yql queries',
            required=True,
        )

        working_dir = sdk2.parameters.String('Yt working directory to put intermediate results', required=True)

    def _create_temp_table(self, yt_client, schema=None, attributes=None):
        from yt.yson import to_yson_type

        attributes = attributes or {}

        if schema is not None:
            attributes['schema'] = to_yson_type(schema, attributes={'strict': True})

        return yt_client.create_temp_table(
            self.Parameters.working_dir,
            expiration_timeout=EXPIRATION_TIME,
            attributes=attributes
        )

    def on_execute(self):
        from infra.nanny.nanny_services_rest.nanny_services_rest.client import ServiceRepoClient
        from yt.wrapper import YtClient
        from .proto import TBusinessIndexDataGenerateAttribute

        yt_client = YtClient(proxy='hahn', token=self._get_yt_token())
        base_version_attribute = '{}/@_base_version'.format(self.Parameters.output_table)

        with self.memoize_stage.launch_shard_subtasks:
            nanny_client = ServiceRepoClient(
                'https://nanny.yandex-team.ru',
                self.Parameters.nanny_yav_secret.data()[SECRET_KEY]
            )
            runtime_attrs = nanny_client.get_runtime_attrs('addrs_base')['content']
            shardmap_task_id = int(runtime_attrs['resources']['sandbox_bsc_shard']['sandbox_shardmap']['task_id'])

            if yt_client.exists(base_version_attribute) and yt_client.get(base_version_attribute) == shardmap_task_id:
                return

            subtasks = []
            self.Context.output_tmp_table = self._create_temp_table(
                yt_client,
                attributes={'_yql_proto_field_data': TBusinessIndexDataGenerateAttribute()},
                schema=[
                    {'name': 'data', 'required': True, 'type': 'string'},
                    {'name': 'permalink', 'required': True, 'type': 'int64'},
                ],
            )
            self.Context.rubric_tmp_table = self._create_temp_table(
                yt_client,
                schema=[
                    {'name': 'rubric_id', 'type': 'uint64', 'required': True},
                    {'name': 'serp_data', 'type': 'uint8', 'required': True},
                    {'name': 'parent_rubric_id', 'type': 'uint64', 'required': True},
                    {'name': 'features', 'type': 'any', 'required': False},
                ],
            )
            for task_id in sdk2.Task[shardmap_task_id].Context.register_tasks:
                subtask = UpdateBusinessDataFromAddrsBaseShard(
                    self,
                    output_table=self.Context.output_tmp_table,
                    rubric_table=self.Context.rubric_tmp_table,
                    shard_task=task_id,
                    prepare_executable=self.Parameters.prepare_executable,
                    yt_token_yav=self.Parameters.yt_token_yav,
                    binary_executor_release_type='custom',
                )
                subtask.Requirements.tasks_resource = self.Requirements.tasks_resource
                subtask.save().enqueue()
                subtasks.append(subtask.id)

            self.Context.subtasks = subtasks
            self.Context.shardmap_task_id = shardmap_task_id

            wait_subtasks(subtasks)

        with self.memoize_stage.analyze_shard_subtasks:
            validate_subtask_statuses(self.Context.subtasks)

        with self.memoize_stage.calculate_rubric_joined:
            self.Context.joined_rubric_tmp_table = self._create_temp_table(yt_client)
            query = CALCULATE_RUBRIC_QUERY.format(
                input_table=self.Context.output_tmp_table,
                output_table=self.Context.joined_rubric_tmp_table,
                rubric_table=self.Context.rubric_tmp_table,
            )
            yql_task = RunYQL2(
                self,
                query=query,
                yql_token_vault_name=self.Parameters.yql_token_vault_name,
                trace_query=True,
                publish_query=True,
                use_v1_syntax=True,
            )
            yql_task.enqueue()
            self.Context.yql_query_subtask_id = yql_task.id
            wait_subtasks([yql_task.id])

        with self.memoize_stage.sort_export_table:
            yt_client.run_sort(self.Context.output_tmp_table, sort_by='permalink')

        with self.memoize_stage.publish_results:
            validate_subtask_statuses([self.Context.yql_query_subtask_id])

            with yt_client.Transaction():
                yt_client.move(self.Context.output_tmp_table, self.Parameters.output_table, force=True)
                yt_client.move(self.Context.joined_rubric_tmp_table, self.Parameters.rubric_table, force=True)
                yt_client.set(base_version_attribute, self.Context.shardmap_task_id)
