import os
import time
import json
import logging
import requests

from sandbox import sdk2
from sandbox.sdk2.helpers import subprocess as sp
import sandbox.sandboxsdk.environments as sdk_environments

import sandbox.common.types.task as ctt
import sandbox.common.types.resource as ctr
from sandbox.common import errors
from sandbox.projects.common import yt_cleaner
from sandbox.projects.common import solomon

from sandbox.projects.MarketModelWizard import GetDocidsFromXmlSearchInputTsv, GetDocidsFromXmlSearchResultTsv
from sandbox.projects.MarketModelWizard.TimeBatcher import GetDocidsFromXmlSearchByChunk
from sandbox.projects.MarketModelWizard.GetTsvFromYt import GetTsvFromYt
from sandbox.projects.MarketModelWizard import YtTransferManagerExecutable
from sandbox.projects.MarketModelWizard.PrepareTables import MarketModelWizardPrepareSerpYtExport

ferry_prod = 'http://rankingmid.ferryman.n.yandex-team.ru'
ferry_prod_batch = 'http://rankingmid-batch.ferryman.n.yandex-team.ru'
ferry_testing = 'http://saas-searchproxy-kv.yandex.net:17000/ranking_mid_priemka'


class MarketModelWizardSerpDataConveyor(sdk2.Task):
    """Conveyor serp data for market model wizard"""

    class Parameters(sdk2.Task.Parameters):
        resource_id = sdk2.parameters.LastReleasedResource(
            'Transfer manager',
            resource_type=YtTransferManagerExecutable,
            state=(ctr.State.READY,),
            required=True,
        )

        with sdk2.parameters.Group('YT input parameters') as yt_parameters:
            path = sdk2.parameters.String(
                'Path with input table',
                required=True,
            )

            yt_vault_token = sdk2.parameters.String(
                'Your token name in vault',
                default='yt-token',
                required=True)

            with sdk2.parameters.RadioGroup('Yt host') as yt_host:
                yt_host.values['hahn'] = yt_host.Value(value='Hahn', default=True)
                yt_host.values['banach'] = yt_host.Value(value='Banach')

        with sdk2.parameters.Group('Batcher parameters') as bather_parameters:
            with sdk2.parameters.RadioGroup('XML origin') as xml_origin:
                xml_origin.values['https://hamster.yandex.ru/search/xml'] = xml_origin.Value(value='Hamster',
                                                                                             default=True)
                xml_origin.values['https://yandex.ru/search/xml'] = xml_origin.Value(value='Yandex')

            numdoc = sdk2.parameters.Integer('Top documents count', default=10)

            reqinfo = sdk2.parameters.String(
                'Your id from https://wiki.yandex-team.ru/jandeksxml/internal-users/',
                required=True,
            )

            task_count = sdk2.parameters.Integer('Task count', default=3)

            max_rps_per_task = sdk2.parameters.Integer('RPS per task', required=True, default=20)

            mean_task_time = sdk2.parameters.Integer('One chunk task time in minutes', required=True, default=15)

            run_only_at_night = sdk2.parameters.Bool('Only night requests (from 22 to 10 MSK)', default=True)

            rps_per_thread = sdk2.parameters.Integer('RPS per thread', default=1)

            solomon_project = sdk2.parameters.String(
                'Solomon project',
                required=True,
            )

            solomon_cluster = sdk2.parameters.String(
                'Solomon cluster',
                required=True,
            )

            solomon_service = sdk2.parameters.String(
                'Solomon service',
                required=True,
            )

        with sdk2.parameters.Group('YT cooking parameters') as yt_cooking_parameters:
            cooking_yt_vault_token = sdk2.parameters.String(
                'Your token name in vault',
                default='yt-token',
                required=True)

            cooking_folder = sdk2.parameters.String(
                'Common folder',
                required=True,
            )

            export_folder = sdk2.parameters.String(
                'Export folder',
                required=True,
            )

            with sdk2.parameters.RadioGroup('Yt host') as cooking_yt_host:
                cooking_yt_host.values['hahn'] = cooking_yt_host.Value(value='Hahn')
                cooking_yt_host.values['banach'] = cooking_yt_host.Value(value='Banach', default=True)

            with cooking_yt_host.value['hahn']:
                banach_path = sdk2.parameters.String(
                    'Result output table on banach',
                    default=None,
                )

        send_ferryman_request = sdk2.parameters.Bool('Send to ferryman', default=False)
        with send_ferryman_request.value[True]:
            with sdk2.parameters.Group('SaaS load parameters') as saas_parameters:
                with sdk2.parameters.RadioGroup('ferry man origin') as ferry_origin:
                    ferry_origin.values[ferry_testing] = ferry_origin.Value(
                        value='Testing',
                        default=True
                    )
                    ferry_origin.values[ferry_prod_batch] = ferry_origin.Value(value='Stable batch')
                    ferry_origin.values[ferry_prod] = ferry_origin.Value(value='Stable')
                ferry_namespace = sdk2.parameters.String(
                    'Namespace',
                    required=True,
                )

        check_results = sdk2.parameters.Bool('Check results', default=False)
        with check_results.value[True]:
            check_percent = sdk2.parameters.Float('Differ in row_count in percent', default=0.05)

            check_max_row_count = sdk2.parameters.Integer('Max mln key count', default=35)
            check_max_data_size = sdk2.parameters.Integer('Max MB data', default=1800)

    class Requirements(sdk2.Task.Requirements):
        cores = 1
        environments = [
            sdk_environments.PipEnvironment('yandex-yt'),
        ]

        class Caches(sdk2.Requirements.Caches):
            pass

    def on_execute(self):
        with self.memoize_stage.prepare_input:
            task = GetTsvFromYt(
                self,
                description='prepare tsv',
                notifications=self.Parameters.notifications,
                create_sub_task=False,
                path=self.Parameters.path,
                yt_vault_token=self.Parameters.yt_vault_token,
                yt_host=self.Parameters.yt_host,
                table_columns=['model_id', 'geo_id', 'text']
            )

            task.enqueue()

            raise sdk2.WaitTask(task.id, ctt.Status.Group.FINISH)

        with self.memoize_stage.get_docids:
            resource_id = GetDocidsFromXmlSearchInputTsv.find(
                task=self.find(GetTsvFromYt, status=ctt.Status.SUCCESS).first(),
            ).first().id
            task = GetDocidsFromXmlSearchByChunk(
                self,
                description='grep docids',
                notifications=self.Parameters.notifications,
                create_sub_task=False,
                input_tsv=resource_id,
                xml_origin=self.Parameters.xml_origin,
                numdoc=self.Parameters.numdoc,
                reqinfo=self.Parameters.reqinfo,
                task_count=self.Parameters.task_count,
                max_rps_per_task=self.Parameters.max_rps_per_task,
                mean_task_time=self.Parameters.mean_task_time,
                run_only_at_night=self.Parameters.run_only_at_night,
                rps_per_thread=self.Parameters.rps_per_thread,
                send_solomon_metrics=True,
                solomon_project=self.Parameters.solomon_project,
                solomon_cluster=self.Parameters.solomon_cluster,
                solomon_service=self.Parameters.solomon_service,
                solomon_sensor=self.Parameters.reqinfo,
            )
            task.enqueue()

            raise sdk2.WaitTask(task.id, ctt.Status.Group.FINISH)

        with self.memoize_stage.prepare_ferryman:
            resource_id = GetDocidsFromXmlSearchResultTsv.find(
                task=self.find(GetDocidsFromXmlSearchByChunk, status=ctt.Status.SUCCESS).first(),
                state=ctr.State.READY,
            ).first().id
            result_path = os.path.join(self.Parameters.export_folder, str(int(time.time())))
            self.Context.result_path = result_path
            task = MarketModelWizardPrepareSerpYtExport(
                self,
                description='prepare yt tables for ferryman',
                notifications=self.Parameters.notifications,
                create_sub_task=False,
                input_tsv=resource_id,
                common_folder=self.Parameters.cooking_folder,
                yt_vault_token=self.Parameters.cooking_yt_vault_token,
                yt_host=self.Parameters.cooking_yt_host,
                path=result_path,
            )
            task.enqueue()

            raise sdk2.WaitTask(task.id, ctt.Status.Group.FINISH)

        with self.memoize_stage.check_result:
            if self.Parameters.check_results:
                import yt.wrapper as yt

                yt.config['token'] = sdk2.Vault.data(self.owner, self.Parameters.cooking_yt_vault_token)
                yt.config['proxy']['url'] = '{}.yt.yandex.net'.format(self.Parameters.cooking_yt_host)

                if self.Parameters.check_percent is not None:

                    current_time = int(os.path.basename(self.Context.result_path))

                    previous_times = sorted(
                        filter(lambda x: int(x) < current_time, yt.list(self.Parameters.export_folder)),
                        key=int
                    )
                    if previous_times:
                        previous_row_count = yt.row_count(
                            os.path.join(self.Parameters.export_folder, previous_times[-1])
                        )
                        current_row_count = yt.row_count(self.Context.result_path)
                        if (float(abs(previous_row_count - current_row_count)) / previous_row_count
                                > self.Parameters.check_percent):
                            raise errors.TaskError('Big change in results, please confirm it')
                    else:
                        logging.info('No history')

                if self.Parameters.check_max_row_count is not None:
                    if yt.row_count(self.Context.result_path) > self.Parameters.check_max_row_count * 10 ** 6:
                        raise errors.TaskError('To much different docids')

                if self.Parameters.check_max_data_size is not None:
                    if (yt.get_attribute(self.Context.result_path, 'uncompressed_data_size')
                            > self.Parameters.check_max_data_size * 2 ** 20):
                        raise errors.TaskError('To much data size')

        with self.memoize_stage.transfer_phase:
            if self.Parameters.cooking_yt_host != 'banach':
                runner = self.Parameters.resource_id

                if runner is None:
                    raise errors.TaskError('No executable founded')
                runner = sdk2.ResourceData(runner)

                logging.info('Start run')
                banach_path = str(self.Parameters.banach_path) \
                    if self.Parameters.banach_path else str(self.Context.result_path)
                with sdk2.helpers.ProcessLog(self, logger='run binary') as pl:
                    env = os.environ.copy()
                    env['YT_TOKEN'] = sdk2.Vault.data(self.owner, self.Parameters.cooking_yt_vault_token)
                    run = [
                        str(runner.path), 'add-task', '--sync',
                        '--src-cluster', str(self.Parameters.cooking_yt_host),
                        '--dst-cluster', 'banach',
                        '--src-table', str(self.Context.result_path),
                        '--dst-table', banach_path,
                    ]
                    ret = sp.Popen(run, stdout=pl.stdout, stderr=sp.STDOUT, env=env).wait()
                    if ret:
                        raise errors.TaskError('run is failed')
                    self.Context.result_path = banach_path

        with self.memoize_stage.ferryman_request:
            if self.Parameters.send_ferryman_request:
                query = {
                    'tables': json.dumps(
                        [
                            {
                                'Path': self.Context.result_path,
                                'Namespace': str(self.Parameters.ferry_namespace),
                                'Timestamp': int(time.time() * 10 ** 6),
                            }
                        ],
                        separators=(',', ':')
                    ),

                }
                r = requests.get('{}/add-full-tables'.format(self.Parameters.ferry_origin), query)
                logging.info(r.url)
                logging.info(r.text)
                try:
                    r.json()
                except:
                    raise errors.TaskError('Fail upload to Ferryman')

            else:
                logging.info('no ferry man request')

        with self.memoize_stage.clean_phase:
            import yt.wrapper as yt

            yt_token = sdk2.Vault.data(self.owner, self.Parameters.yt_vault_token)
            if self.Parameters.cooking_yt_host != 'banach':
                yt_cleaner.clean_history_folder(
                    yt.YtClient(token=yt_token, proxy=self.Parameters.cooking_yt_host, config=yt.config.config),
                    self.Parameters.export_folder,
                )
            if not self.Parameters.banach_path:
                yt_cleaner.clean_history_folder(
                    yt.YtClient(token=yt_token, proxy='banach', config=yt.config.config),
                    self.Parameters.export_folder,
                )

        with self.memoize_stage.monitoring_phase:
            commonLabels = {
                'project': 'market_model_wizard',
                'cluster': 'sandbox_metrics',
                'service': 'yt_table_sizes',
            }

            sensors = list()

            import yt.wrapper as yt

            yt.config['token'] = sdk2.Vault.data(self.owner, self.Parameters.yt_vault_token)
            yt.config['proxy']['url'] = 'banach.yt.yandex.net'

            sensors.append(
                {
                    'labels': {'sensor': 'serp_table_row_count', },
                    'ts': int(time.time()),
                    'value': int(yt.row_count(self.Context.result_path)),
                }
            )

            sensors.append(
                {
                    'labels': {'sensor': 'serp_table_data_size', },
                    'ts': int(time.time()),
                    'value': int(yt.get_attribute(self.Context.result_path, 'uncompressed_data_size')),
                }
            )

            solomon.upload_to_solomon(commonLabels, sensors)
