from sandbox import sdk2 as sdk
import sandbox.sdk2.helpers as helpers
from sandbox.sdk2.helpers import subprocess as sp

from sandbox.projects.resource_types import NEWS_MR_INDEXER
from sandbox.projects.common import solomon

import sandbox.common.types.resource as ctr
import sandbox.common.types.client as ctc

import os
import time


DEFAULT_YT_SPEC = ('{"reducer":{"memory_limit":32212254720},'
                   '"job_io":{"table_writer":{"max_row_weight":134217728}},'
                   '"map_job_io":{"table_writer":{"max_row_weight":134217728}},'
                   '"reduce_job_io":{"table_writer":{"max_row_weight":134217728}},'
                   '"sort_job_io":{"table_writer":{"max_row_weight":134217728}},'
                   '"partition_job_io":{"table_writer":{"max_row_weight":134217728}},'
                   '"merge_job_io":{"table_writer":{"max_row_weight":134217728}}}')


class BuildNewsSearchShards(sdk.Task):
    """
    Meta-task for building search index tables in YT and prepare shards.
    """

    class Requirements(sdk.Task.Requirements):
        cores = 1
        disk_space = 10 * 1024
        ram = 20 * 1024
        client_tags = ctc.Tag.Group.LINUX

        class Caches(sdk.Requirements.Caches):
            pass

    class Parameters(sdk.Task.Parameters):
        max_restarts = 1
        kill_timeout = 5 * 3600
        description = "Runs YT process that builds index tables and then prepares news search shards"

        yt_proxy = sdk.parameters.String('YT proxy',
                                         default='hahn.yt.yandex.net',
                                         required=True)

        indexer_resource = sdk.parameters.LastReleasedResource(
            "News MR indexer executable",
            resource_type=NEWS_MR_INDEXER,
            state=ctr.State.READY,
            required=True)

        vault_item_owner = sdk.parameters.String('YT token owner in vault',
                                                 default='NEWS',
                                                 required=True)

        vault_item_name = sdk.parameters.String('YT resource key in vault',
                                                default='yt_token',
                                                required=True)

        tvm_item_name = sdk.parameters.String('TVM resource key in vault',
                                              default='solomon_tvm',
                                              required=True)

        indexed_docs = sdk.parameters.String('Path to the table with indexed docs',
                                             default='//home/news-prod/archive/index/indexed_docs',
                                             required=True)

        since = sdk.parameters.Integer('Date to start from (inclusive) format: YYYYMMDD',
                                       default=0,
                                       required=True)

        to = sdk.parameters.Integer('Date to stop at (exclusive) format: YYYYMMDD',
                                    default=None,
                                    required=False)

        shard_root_path = sdk.parameters.String('Path to the Cypress node where shard root is located',
                                                default='//home/news-prod/archive/index/shards/',
                                                required=True)

        shard_count = sdk.parameters.Integer('Count of shards to split search index into',
                                             default=78,
                                             required=True)

        yt_spec = sdk.parameters.String('YT_SPEC environment variable value',
                                        default=DEFAULT_YT_SPEC,
                                        required=False)

        yt_pool = sdk.parameters.String('YT pool',
                                        default=None,
                                        required=False)

        rebuild = sdk.parameters.Bool('Rebuild indexed docs from info records',
                                      default=False,
                                      required=False)

        max_results = sdk.parameters.Integer('How many results will be kept. Use 0 to keep all',
                                             default=0,
                                             required=True)

        break_clusters = sdk.parameters.Bool('Break clusters into separate docs',
                                             default=False,
                                             required=False)

    class Context(sdk.Task.Context):
        pass

    def on_execute(self):
        commonLabels = {
            'project': 'news',
            'cluster': 'main',
            'service': 'main',
        }
        sensors = [
            {
                'labels': {
                    'archive': 'indexing',
                    'sensor': 'start',
                },
                'ts': int(time.time()),
                'value': int(time.time()),
            }
        ]
        solomon_tvm = sdk.Vault.data(self.Parameters.vault_item_owner, self.Parameters.tvm_item_name)
        solomon.push_to_solomon_v2(params=commonLabels, sensors=sensors, token=solomon_tvm)

        indexer_exec = sdk.ResourceData(self.Parameters.indexer_resource).path.joinpath('mr_indexer')
        indexer_exec.chmod(0o744)

        os.environ['YT_LOG_LEVEL'] = 'INFO'
        os.environ['YT_TOKEN'] = sdk.Vault.data(self.Parameters.vault_item_owner, self.Parameters.vault_item_name)

        if self.Parameters.yt_spec:
            os.environ['YT_SPEC'] = self.Parameters.yt_spec

        if self.Parameters.yt_pool:
            os.environ['YT_POOL'] = self.Parameters.yt_pool

        cmd = [
            str(indexer_exec),
            '--proxy', str(self.Parameters.yt_proxy),
            '--since', str(self.Parameters.since),
            '--shard-count', str(self.Parameters.shard_count),
            '--shard-root', str(self.Parameters.shard_root_path),
            '--indexed-out', str(self.Parameters.indexed_docs),
            '--latest'
        ]
        if self.Parameters.to:
            cmd.append('--to')
            cmd.append(str(self.Parameters.to))
        if self.Parameters.rebuild:
            cmd.append('--rebuild')
        if self.Parameters.break_clusters:
            cmd.append('--break-clusters')
        if self.Parameters.max_results:
            cmd.append('--keep-last-shards')
            cmd.append(str(self.Parameters.max_results))
        with helpers.ProcessLog(self, logger="mr_indexer") as pl:
            sp.check_call(cmd,
                          stderr=pl.stdout,
                          stdout=pl.stdout)

            pl.logger.info('YT process has finished successfully')

        sensors = [
            {
                'labels': {
                    'archive': 'indexing',
                    'sensor': 'finish',
                },
                'ts': int(time.time()),
                'value': int(time.time()),
            }
        ]
        solomon.push_to_solomon_v2(params=commonLabels, sensors=sensors, token=solomon_tvm)
