# -*- coding: utf-8 -*-
from sandbox import sdk2
import logging
import os
import time
import requests
import json

from sandbox.sandboxsdk import environments
from sandbox.sandboxsdk import process
from sandbox.common.errors import TaskFailure
import sandbox.common.types.client as ctc


class AdfoxYt2ChTransfuser(sdk2.Task):
    """ Самодельный переливщик YT -> ClickHouse. """
    # https://wiki.yandex-team.ru/adfox/develop/projects/adfox-audience/inventoryreport

    class Context(sdk2.Task.Context):
        stages = []
        idxNextRow = None
        retriesCount = None

    class Requirements(sdk2.Task.Requirements):
        client_tags = ctc.Tag.GENERIC
        environments = (
            environments.PipEnvironment('yandex-yt', '0.8.17-0'),
        )

    class Parameters(sdk2.Task.Parameters):
        _container = sdk2.parameters.Container(
            "Environment container resource",
            default_value=365438266,  # LXC container with ch-client
            required=True
        )

        kill_timeout = 60000  # 10h

        with sdk2.parameters.Group("YT Parameters") as yt_block:
            yl_token_vault_name = sdk2.parameters.String('Vault name to extract YT token', default='ADFOX_ROBOT_YT_TOKEN', required=True)
            yl_token_vault_owner = sdk2.parameters.String('Vault owner to extract YT token', default='ADFOX', required=True)
            yt_database = sdk2.parameters.String('YT database name', default="HAHN", required=True)
            yt_table_name = sdk2.parameters.String('YT table name', required=True)
            yt_starting_row = sdk2.parameters.Integer('Row index to start copying from', default=0, required=True)

        with sdk2.parameters.Group("ClickHouse Parameters") as ch_block:
            ch_host_name = sdk2.parameters.String('host name', default='ch.adfox.ru', required=True)
            ch_user_name = sdk2.parameters.String('user name')
            ch_password_vault_name = sdk2.parameters.String('Vault name to extract ClickHouse password', default='ADFOX_CH_REPORT_PASSWORD', required=True)
            ch_password_vault_owner = sdk2.parameters.String('Vault owner to extract ClickHouse password', default='ADFOX', required=True)
            ch_table_name = sdk2.parameters.String('table', required=True)

        with sdk2.parameters.Group("Transfer Parameters") as transfer_block:
            tr_format = sdk2.parameters.String('Transfer format', required=True)
            tr_check_query = sdk2.parameters.String('Query to check result')
            tr_block_size = sdk2.parameters.Integer('Transfer block size (rows)', default=300000, required=True)
            tr_sleep_to_retry = sdk2.parameters.Integer('Timeout before retries (sec)', default=300, required=True)
            tr_retries_count = sdk2.parameters.Integer('Retries on error', default=50, required=True)

        with sdk2.parameters.Group("Debug option") as dbg_block:
            is_test_launch = sdk2.parameters.Bool("Test launch", default=False)

    def __GetYtToken(self):
        return sdk2.Vault.data(self.Parameters.yl_token_vault_owner, self.Parameters.yl_token_vault_name)

    def __GetChPassword(self):
        return sdk2.Vault.data(self.Parameters.ch_password_vault_owner, self.Parameters.ch_password_vault_name)

    def on_execute(self):
        logging.info('Running inventory report builder')

        os.environ['YT_TOKEN'] = self.__GetYtToken()

        if self.Parameters.is_test_launch:
            logging.info('Initiating test launch')
            self.__LaunchTests()
        else:
            yt = self.__GetYtClient()
            if not yt.exists(self.Parameters.yt_table_name):
                raise TaskFailure('Source table does not exists - {}'.format(self.Parameters.yt_table_name))
            if yt.is_empty(self.Parameters.yt_table_name):
                raise TaskFailure('Source table is empty - {}'.format(self.Parameters.yt_table_name))

            self.__TransfuseData()
            if self.Parameters.tr_check_query:
                self.__CheckData(self.Parameters.tr_check_query)

        logging.info('Exiting task')

    __strYtReaderPath = None

    def __getYtReaderPath(self):
        if self.__strYtReaderPath:
            return self.__strYtReaderPath

        resource_type = 'YT_2_CH_TRANSFUSER_BINARIES'

        logging.info('Obtaining {} resource'.format(resource_type))
        resource = sdk2.Resource["ARCADIA_BINARY_ARCHIVE"].find(
            attrs=dict(ADFOX_ARCHIVE_TYPE=resource_type)
        ).first()
        if not resource:
            raise TaskFailure('{} resource could not be found'.format(resource_type))
        resource_data = sdk2.ResourceData(resource)

        logging.info('Unpacking archive with source binaries - {}'.format(resource_data.path))
        dst_path = str(self.path("src_bin"))
        if not os.path.exists(dst_path):
            os.makedirs(dst_path)
        process.run_process(
            [
                "tar",
                "xf",
                str(resource_data.path),
            ],
            work_dir=dst_path,
            log_prefix="untar_src_bin"
        )

        self.__strYtReaderPath = os.path.join(dst_path, "ytReader")
        return self.__strYtReaderPath

    __ytClient = None

    def __GetYtClient(self):
        from yt.wrapper import YtClient
        if self.__ytClient is None:
            self.__ytClient = YtClient(proxy=self.Parameters.yt_database, token=self.__GetYtToken(), )
        return self.__ytClient

    def __LaunchTests(self):
        strYtReaderPath = self.__getYtReaderPath()
        logging.info('Got ytReader at [{}]'.format(strYtReaderPath))

        logging.info('Performing test launch')
        process.run_process(
            [
                strYtReaderPath,
                "--help",
            ],
            log_prefix="ytReader_test"
        )

        return

    def __downloadBlock(self, idxStart, idxFinish, strBlockFilePath):
        logging.info('Downloading block [{}:{})'.format(idxStart, idxFinish))
        with open(strBlockFilePath, 'w') as fBlock:
            proc_download = process.run_process(
                [
                    self.__getYtReaderPath(),
                    '--table={}'.format(self.Parameters.yt_table_name),
                    '--fields={}'.format(self.Parameters.tr_format),
                    '--row_start={}'.format(idxStart),
                    '--row_finish={}'.format(idxFinish),
                    '--server={}'.format(self.Parameters.yt_database)
                ],
                log_prefix="ytReader",
                stdout=fBlock,
                check=False
            )
        return proc_download.returncode

    def __uploadBlock(self, strBlockFilePath):
        logging.info('Uploading block')

        strUser = ''
        strPassword = ''
        if self.Parameters.ch_user_name:
            pswrd = self.__GetChPassword()
            if not pswrd:
                raise TaskFailure('ClickHouse password is not given. Supply user password please')
            strUser = '--user={}'.format(self.Parameters.ch_user_name)
            strPassword = '--password={}'.format(pswrd)

        with open(strBlockFilePath, 'r') as fBlock:
            proc_upload = process.run_process(
                [
                    'clickhouse-client',
                    '--host={}'.format(self.Parameters.ch_host_name),
                    strUser,
                    strPassword,
                    '--query=INSERT INTO {} FORMAT JSONEachRow'.format(self.Parameters.ch_table_name),
                    '--max_insert_block_size=4000000000'  # TODO (helium): replace it with [self.Parameters.tr_block_size]
                ],
                log_prefix="block_upload",
                stdin=fBlock,
                check=False
            )
            return proc_upload.returncode

    def __processError(self, returnCode):
        logging.info('WARNING! Subprocess finished with error code - {}'.format(returnCode))
        if self.Context.retriesCount <= 0:
            raise TaskFailure('There are no more retries to start process again!')

        logging.info('Sleeping for {} seconds '.format(self.Parameters.tr_sleep_to_retry))
        self.Context.retriesCount -= 1
        time.sleep(self.Parameters.tr_sleep_to_retry)

    def __TransfuseData(self):
        # Номер первой строки очередного блока храним в контексте
        if self.Context.idxNextRow is None:
            self.Context.idxNextRow = self.Parameters.yt_starting_row
        # Тут будем хранить кол-во допустимых ошибок в процессе
        if self.Context.retriesCount is None:
            self.Context.retriesCount = self.Parameters.tr_retries_count

        strBlockFile = str(self.path("yt_table.block"))
        logging.info('Temporary file name = {}'.format(strBlockFile))

        # Общий цикл - перебираем блоки исходной таблички
        while(True):
            curStartRowIndex = self.Context.idxNextRow
            curFinishRowIndex = curStartRowIndex + self.Parameters.tr_block_size

            if os.path.isfile(strBlockFile):
                os.remove(strBlockFile)
            downRetCode = self.__downloadBlock(curStartRowIndex, curFinishRowIndex, strBlockFile)
            # Если выгрузка очередного блока не удалась, пробуем еще разок
            if downRetCode:
                self.__processError(downRetCode)
                continue

            logging.info('FILE_SIZE = {}'.format(os.path.getsize(strBlockFile)))

            # Ели файл блока присуствует и имеет ненулевой размер, значит у нас есть данные для загрузки
            if os.path.isfile(strBlockFile) and os.path.getsize(strBlockFile) > 0:
                upRetCode = self.__uploadBlock(strBlockFile)
                # Если загрузка скачанного блока не удалась, попробуем ещё разочек
                if upRetCode:
                    self.__processError(upRetCode)
                    continue
            # В протвном случае исходная табличка кончилась
            else:
                logging.info('No data received. It seems like source table is over')
                break

            self.Context.idxNextRow = curStartRowIndex + self.Parameters.tr_block_size

    def __askForCount(self, query):
        params = None
        if self.Parameters.ch_user_name:
            pswrd = self.__GetChPassword()
            if not pswrd:
                raise TaskFailure('ClickHouse password is not given. Supply user password please')
            params = {'user': self.Parameters.ch_user_name, 'password': pswrd}

        query = query + ' FORMAT JSON'

        try:
            r = requests.post(
                'http://{}:8123'.format(self.Parameters.ch_host_name),
                params=params,
                data=query
            )
            logging.info('Query: {} {} {}'.format(r.url, params, query))
            if 'Exception' in r.text:
                raise Exception(r.text.splitlines()[0])
            response = json.loads(r.text)
            if not response['data']:
                raise TaskFailure('No data in response: {}'.format(params))
        except:
            raise
        else:
            logging.info('Response: OK - {}'.format(str(response['data'])))
            return response['data'][0]

    def __CheckData(self, strCheckQuery):
        yt = self.__GetYtClient()
        source_row_count = yt.row_count(self.Parameters.yt_table_name)
        logging.info('Source table contains {} rows'.format(source_row_count))

        secondsToSleep = 5*60
        logging.info('Sleeping for {} seconds to give ClickHouse some time'.format(secondsToSleep))
        time.sleep(secondsToSleep)

        reply = self.__askForCount(strCheckQuery)
        strCountFieldName = 'count'
        if strCountFieldName not in reply:
            raise TaskFailure('ClickHouse check query result should contain "count" field')
        dest_rows_count = int(reply[strCountFieldName])
        logging.info('Destination table contains {} rows'.format(dest_rows_count))

        if source_row_count != dest_rows_count:
            raise TaskFailure('Data amount in source table do not match data amount in destination table ({} != {})'.format(source_row_count, dest_rows_count))
