import os
import time
import json
import logging
import requests

import sandbox.sdk2 as sdk2
from sandbox.sdk2.helpers import subprocess as sp
import sandbox.sandboxsdk.environments as sdk_environments

import sandbox.common.types.resource as ctr
import sandbox.common.types.task as ctt
from sandbox.common import errors

from sandbox.projects.common import yt_cleaner
from sandbox.projects.common import solomon

from sandbox.projects.resource_types import MR_GEMINICL
from sandbox.projects.MarketModelWizard import YtTransferManagerExecutable
from sandbox.projects.MarketModelWizard.PrepareOfferTables import MarketModelWizardPrepareOfferYtExport

ferry_prod = 'http://rankingmid.ferryman.n.yandex-team.ru'
ferry_prod_batch = 'http://rankingmid-batch.ferryman.n.yandex-team.ru'
ferry_testing = 'http://saas-searchproxy-kv.yandex.net:17000/ranking_mid_priemka'


class MarketModelWizardOfferDataConveyor(sdk2.Task):
    """Conveyor offer data for market model wizard"""

    class Parameters(sdk2.Task.Parameters):
        resource_id = sdk2.parameters.LastReleasedResource(
            'Transfer manager',
            resource_type=YtTransferManagerExecutable,
            state=(ctr.State.READY,),
            required=True,
        )

        with sdk2.parameters.Group('YT parameters') as yt_parameters:
            common_folder = sdk2.parameters.String(
                'Common folder',
                required=True,
            )

            export_folder = sdk2.parameters.String(
                'Export folder',
                required=True,
            )

            yt_vault_token = sdk2.parameters.String(
                'Your yt token name in vault',
                default='yt-token',
                required=True
            )

            yql_vault_token = sdk2.parameters.String(
                'Your yql token name in vault',
                default='yql-token',
                required=True
            )

            with sdk2.parameters.RadioGroup('Yt host') as yt_host:
                yt_host.values['hahn'] = yt_host.Value(value='Hahn')
                yt_host.values['banach'] = yt_host.Value(value='Banach', default=True)

            with yt_host.value['hahn']:
                banach_path = sdk2.parameters.String(
                    'Result output table on banach',
                    default=None,
                )

        with sdk2.parameters.Group('Gemini parameters') as gemini_parameters:
            gemini_resource_id = sdk2.parameters.Resource(
                'MR Geminicl',
                resource_type=MR_GEMINICL,
                state=(ctr.State.READY,),
                required=True,
            )

            mr_gemini_user = sdk2.parameters.String(
                'MR gemini user',
                default='mr-any',
                required=True,
            )
            mr_gemini_job_count = sdk2.parameters.Integer(
                'MR gemini job count',
                default=1000,
                required=True,
            )
            mr_gemini_max_rps = sdk2.parameters.Integer(
                'MR gemini max rps',
                default=50000,
                required=True,
            )

        send_ferryman_request = sdk2.parameters.Bool('Send to ferryman', default=False)
        with send_ferryman_request.value[True]:
            with sdk2.parameters.Group('SaaS load parameters') as saas_parameters:
                with sdk2.parameters.RadioGroup('ferry man origin') as ferry_origin:
                    ferry_origin.values[ferry_testing] = ferry_origin.Value(
                        value='Testing',
                        default=True
                    )
                    ferry_origin.values[ferry_prod_batch] = ferry_origin.Value(value='Stable batch')
                    ferry_origin.values[ferry_prod] = ferry_origin.Value(value='Stable')
                ferry_namespace = sdk2.parameters.String(
                    'Namespace',
                    required=True,
                )

        check_results = sdk2.parameters.Bool('Check results', default=False)
        with check_results.value[True]:
            check_percent = sdk2.parameters.Float('Differ in row_count in percent', default=0.05)

            check_max_row_count = sdk2.parameters.Integer('Max mln key count', default=35)
            check_max_data_size = sdk2.parameters.Integer('Max MB data', default=1400)

    class Requirements(sdk2.Task.Requirements):
        cores = 1
        environments = [
            sdk_environments.PipEnvironment('yandex-yt'),
        ]

        class Caches(sdk2.Requirements.Caches):
            pass

    def on_execute(self):
        with self.memoize_stage.prepare_ferryman:
            result_path = os.path.join(self.Parameters.export_folder, str(int(time.time())))
            self.Context.result_path = result_path
            task = MarketModelWizardPrepareOfferYtExport(
                self,
                description='prepare yt tables for ferryman',
                notifications=self.Parameters.notifications,
                create_sub_task=False,
                kill_timeout=3600 * 6,
                common_folder=self.Parameters.common_folder,
                yt_vault_token=self.Parameters.yt_vault_token,
                yql_vault_token=self.Parameters.yql_vault_token,
                yt_host=self.Parameters.yt_host,
                gemini_resource_id=self.Parameters.gemini_resource_id.id,
                mr_gemini_user=self.Parameters.mr_gemini_user,
                mr_gemini_job_count=self.Parameters.mr_gemini_job_count,
                mr_gemini_max_rps=self.Parameters.mr_gemini_max_rps,
                path=result_path,
            )
            task.enqueue()

            raise sdk2.WaitTask(task.id, ctt.Status.Group.FINISH)

        with self.memoize_stage.check_result:
            import yt.wrapper as yt

            yt.config['token'] = sdk2.Vault.data(self.owner, self.Parameters.yt_vault_token)
            yt.config['proxy']['url'] = '{}.yt.yandex.net'.format(self.Parameters.yt_host)

            if self.Parameters.check_percent is not None:

                current_time = int(os.path.basename(self.Context.result_path))

                previous_times = sorted(
                    filter(lambda x: int(x) < current_time, yt.list(self.Parameters.export_folder)),
                    key=int
                )
                if previous_times:
                    previous_row_count = yt.row_count(os.path.join(self.Parameters.export_folder, previous_times[-1]))
                    current_row_count = yt.row_count(self.Context.result_path)
                    if (float(abs(previous_row_count - current_row_count)) / previous_row_count
                            > self.Parameters.check_percent):
                        raise errors.TaskError('Big change in results, please confirm it')
                else:
                    logging.info('No history')

            if self.Parameters.check_max_row_count is not None:
                if yt.row_count(self.Context.result_path) > self.Parameters.check_max_row_count * 10 ** 6:
                    raise errors.TaskError('To much different docids')

            if self.Parameters.check_max_data_size is not None:
                if yt.get_attribute(self.Context.result_path,
                                    'uncompressed_data_size') > self.Parameters.check_max_data_size * 2 ** 20:
                    raise errors.TaskError('To much data size')

        with self.memoize_stage.transfer_phase:
            if self.Parameters.yt_host != 'banach':
                runner = self.Parameters.resource_id

                if runner is None:
                    raise errors.TaskError('No executable founded')
                runner = sdk2.ResourceData(runner)

                logging.info('Start run')
                banach_path = str(self.Parameters.banach_path) \
                    if self.Parameters.banach_path else str(self.Context.result_path)
                with sdk2.helpers.ProcessLog(self, logger='run binary') as pl:
                    env = os.environ.copy()
                    env['YT_TOKEN'] = sdk2.Vault.data(self.owner, self.Parameters.yt_vault_token)
                    run = [
                        str(runner.path), 'add-task', '--sync',
                        '--src-cluster', str(self.Parameters.yt_host),
                        '--dst-cluster', 'banach',
                        '--src-table', str(self.Context.result_path),
                        '--dst-table', banach_path,
                    ]
                    ret = sp.Popen(run, stdout=pl.stdout, stderr=sp.STDOUT, env=env).wait()
                    if ret:
                        raise errors.TaskError('run is failed')
                    self.Context.result_path = banach_path

        with self.memoize_stage.ferryman_request:
            if self.Parameters.send_ferryman_request:
                query = {
                    'tables': json.dumps(
                        [
                            {
                                'Path': self.Context.result_path,
                                'Namespace': str(self.Parameters.ferry_namespace),
                                'Timestamp': int(time.time() * 10 ** 6),
                            }
                        ],
                        separators=(',', ':')
                    ),

                }
                r = requests.get('{}/add-full-tables'.format(self.Parameters.ferry_origin), query)
                logging.info(r.url)
                logging.info(r.text)
                try:
                    r.json()
                except:
                    raise errors.TaskError('Fail upload to Ferryman')

            else:
                logging.info('no ferry man request')

        with self.memoize_stage.clean_phase:
            import yt.wrapper as yt

            yt_token = sdk2.Vault.data(self.owner, self.Parameters.yt_vault_token)
            if self.Parameters.yt_host != 'banach':
                yt_cleaner.clean_history_folder(
                    yt.YtClient(token=yt_token, proxy=self.Parameters.yt_host, config=yt.config.config),
                    self.Parameters.export_folder,
                )
            if not self.Parameters.banach_path:
                yt_cleaner.clean_history_folder(
                    yt.YtClient(token=yt_token, proxy='banach', config=yt.config.config),
                    self.Parameters.export_folder,
                )

        with self.memoize_stage.monitoring_phase:
            commonLabels = {
                'project': 'market_model_wizard',
                'cluster': 'sandbox_metrics',
                'service': 'yt_table_sizes',
            }

            sensors = list()

            import yt.wrapper as yt

            yt.config['token'] = sdk2.Vault.data(self.owner, self.Parameters.yt_vault_token)
            yt.config['proxy']['url'] = 'banach.yt.yandex.net'

            sensors.append(
                {
                    'labels': {'sensor': 'offer_table_row_count', },
                    'ts': int(time.time()),
                    'value': int(yt.row_count(self.Context.result_path)),
                }
            )

            sensors.append(
                {
                    'labels': {'sensor': 'offer_table_data_size', },
                    'ts': int(time.time()),
                    'value': int(yt.get_attribute(self.Context.result_path, 'uncompressed_data_size')),
                }
            )

            solomon.upload_to_solomon(commonLabels, sensors)
