# -*- coding: utf-8 -*-


import os
import logging

from sandbox.sandboxsdk.task import SandboxTask
from sandbox.sandboxsdk.parameters import SandboxStringParameter
from sandbox.sandboxsdk.parameters import SandboxIntegerParameter
from sandbox.sandboxsdk.parameters import ResourceSelector
from sandbox.sandboxsdk import environments

import sandbox.projects.logs.common as us_ci


class LogsList(SandboxStringParameter):
    """
        Имена логов, разделенные запятыми, для которых нужно запускать диффание
    """
    name = 'logs_list'
    description = 'Comma-separated logs list for which we should run diff. It\'s ignored if meatpath_prefix_resource is specified'


class MapreduceCluster(SandboxStringParameter):
    name = 'mr_cluster'
    description = 'MR cluster where all jobs will work'
    default_value = 'cedar00.search.yandex.net'


class DiffToolId(ResourceSelector):
    name = 'diff_tool_id'
    description = 'Идентификатор ресурса бинарника, строящего дифф'


class YtToolId(ResourceSelector):
    name = 'yt_tool_id'
    description = 'Идентификатор ресурса бинарника yt'


class MrLsId(ResourceSelector):
    name = 'mr_ls_id'
    description = 'Идентификатор ресурса бинарника mr_ls'


class InputData(SandboxStringParameter):
    name = 'input_data'
    description = 'Python-style массив кортежей [(путь к первой таблице, путь ко второй таблице, папка с результатами, путь к выжимке, название лога, путь к cons. check первой таблицы, путь к cons. check второй таблицы)]'  # noqa


class MeatpathPrefixResource(SandboxStringParameter):
    name = 'meatpath_prefix_resource'
    description = 'Если задан, то таблицы сессий, перечисленные в input_data, будут взяты из папки, путь к которой лежит в ресурсе.'


class YTToken(SandboxStringParameter):
    name = 'yt_token'
    description = 'Name of secret with YT token (from sb-vault)'


class YTTokenOwner(SandboxStringParameter):
    name = 'yt_token_owner'
    description = 'Owner of secret with YT token (default: owner of task)'


class YTPool(SandboxStringParameter):
    name = 'yt_pool'
    description = 'YT pool'


class MaxMutualProcesses(SandboxIntegerParameter):
    name = 'max_mutual_processes'
    description = 'Максимальное количество одновременно запущенных процессов построения сессий'
    default_value = 30


class SessionsDiffTask(SandboxTask):
    type = "HASHED_SESSIONS_DIFF"
    cores = 1
    required_ram = 8072
    execution_space = 4096

    environment = (
        environments.PipEnvironment('yandex-yt', version='0.10.8'),
    )

    input_parameters = [LogsList, MapreduceCluster, DiffToolId, YtToolId, MrLsId, InputData, MeatpathPrefixResource, YTToken, YTTokenOwner, YTPool, MaxMutualProcesses]

    def __init__(self, *args, **kwargs):
        SandboxTask.__init__(self, *args, **kwargs)

        self.env = dict(os.environ)
        self.env['MR_NET_TABLE'] = 'ipv6'
        self.env['MR_USER'] = 'userstats'

    def DropDirectoryIfEmpty(self, directory):
        if 'yt_tool_path' in self.ctx:
            dropCmd = [self.ctx['yt_tool_path'], '--proxy', 'hahn', 'remove', directory]

            us_ci.RunProcesses([dropCmd], self.env, silent=True)

    def Exists(self, table_path):
        import yt.wrapper as yt

        #############
        owner = self.ctx.get(YTTokenOwner.name, '')
        if not owner:
            owner = self.owner
        yt.config["token"] = self.get_vault_data(owner, self.ctx[YTToken.name])
        #############

        yt.config["proxy"]["url"] = self.ctx['mr_cluster']
        yt.config["prefix"] = "//"
        return yt.exists(table_path)

    def get_input_tables(self, tablePrefix):
        input_tables = [tablePrefix]
        for suf in ["_errors", "_skips"]:
            if self.Exists(tablePrefix + suf):
                input_tables.append(tablePrefix + suf)
        return input_tables

    def CompareSessions(self, src_prefix):
        commands = []
        resultFolders = []
        for inputBlock in self.INPUT_DATA:
            firstTable = src_prefix + inputBlock[0]
            if "similar" in firstTable and "mobile" in firstTable:  # TODO get rid of tmp crutch
                continue
            secondTable = src_prefix + inputBlock[1]
            slashed_dest_path = self.MakeSlashedPath(inputBlock[2])

            first_input_tables = self.get_input_tables(firstTable)
            second_input_tables = self.get_input_tables(secondTable)
            cmd = [self.ctx['diff_tool_path'], '-s', self.ctx['mr_cluster']] + first_input_tables + second_input_tables + [slashed_dest_path + 'support']
            commands.append(cmd)

            resultFolders.append(inputBlock[2])

        files = us_ci.RunSimultaneousProcessesWithFiles(commands, self.env, max_processes_count=self.ctx[MaxMutualProcesses.name], log_prefix="diff_data")
        logging.info('Data fetched!')
        assert len(commands) == len(files), 'Не совпадают длины массивов commands и files'

        for folder in resultFolders:
            self.DropDirectoryIfEmpty(folder)

        return files

    def GetTableSize(self, table):
        cmd = [
            self.ctx['mr_ls_path'],
            '-s', self.ctx['mr_cluster'],
            '-e',
            '-l',
            table + '\*'
        ]
        processOutput, _ = us_ci.RunProcess(cmd, self.env)
        logging.info('mr_ls out: ' + processOutput)

        processOutput = processOutput.split()
        if len(processOutput) >= 4:
            return processOutput[3]
        else:
            return 0

    def CountConsistencyCheckErrors(self, src_prefix):
        result = []
        for inputBlock in self.INPUT_DATA:
            firstTable = src_prefix + inputBlock[5]
            secondTable = src_prefix + inputBlock[6]
            result.append((self.GetTableSize(firstTable), self.GetTableSize(secondTable)))

        return result

    def CalculateDiff(self):
        files = self.CompareSessions(self.ctx['src_prefix'])
        consistencyCheckData = self.CountConsistencyCheckErrors(self.ctx['src_prefix'])

        result = []
        for i in range(len(files)):
            file_content = None
            with open(files[i], 'r') as data_file:
                file_content = data_file.read().strip()
            logData = {}
            logData['Diff'] = file_content
            logData['FetchedDataPath'] = self.ctx['src_prefix'] + self.INPUT_DATA[i][3]
            logData['LogName'] = self.INPUT_DATA[i][4]
            logData['SessionsTable1'] = self.ctx['src_prefix'] + self.INPUT_DATA[i][0]
            logData['SessionsTable2'] = self.ctx['src_prefix'] + self.INPUT_DATA[i][1]
            logData['DiagHashTable'] = self.MakeSlashedPath(self.INPUT_DATA[i][2]) + 'support'
            logData['ErrCount1'] = consistencyCheckData[i][0]
            logData['ErrCount2'] = consistencyCheckData[i][1]

            if file_content != '{}' or logData['ErrCount1'] != logData['ErrCount2']:
                result.append(logData)

        return result

    def MakeDiffFile(self, diffInfos):
        if len(diffInfos) == 0:
            return False

        import make_diff

        jsTemplate = os.path.join(os.path.dirname(__file__), 'us_diff.js')
        htmlTemplate = os.path.join(os.path.dirname(__file__), 'template.html')
        indexHtmlTemplate = os.path.join(os.path.dirname(__file__), 'index_template.html')

        if not os.path.exists('diff'):
            os.makedirs('diff')

        diffUrls = {}
        for info in diffInfos:
            make_diff.render_diff_to_html(info['Diff'], info, os.path.join('diff', info['LogName'] + '_diff.html'), jsTemplate, htmlTemplate)
            diffUrls[info['LogName']] = info['LogName'] + '_diff.html'

        make_diff.render_main_page(diffUrls, indexHtmlTemplate, os.path.join('diff', 'index.html'))

        return True

    def StoreDiffResult(self, diffs):
        self.ctx['out_resource_id'] = None
        hasDiff = self.MakeDiffFile(diffs)
        logging.info('hasDiff: ' + str(hasDiff))
        if hasDiff:
            resource = self.create_resource('diff_list',
                                            'diff',
                                            'SESSIONS_DIFF_TABLES_LIST',
                                            arch=None)

            self.ctx['out_resource_id'] = resource.id
        else:
            self.ctx['out_resource_id'] = None

    def MakeSlashedPath(self, path):
        if path[-1:] == '/':
            return path
        else:
            return path + '/'

    def CorrectionKostyl(self, input_data, idx1, idx2):
        def _get_stripped_and_splitted_by_token(string):
            return string.strip('/').split('/')

        first_input_path_deepest_dirname = _get_stripped_and_splitted_by_token(input_data[idx1])[-2]
        second_input_path_parts = _get_stripped_and_splitted_by_token(input_data[idx2])
        second_input_path_deepest_dirname = second_input_path_parts[-2]
        if first_input_path_deepest_dirname == second_input_path_deepest_dirname:
            input_data[idx2] = '/'.join(second_input_path_parts[:-1]) + "_patched" + "/" + second_input_path_parts[-1]

    def ParseInputData(self):
        self.ctx['src_prefix'] = ''
        logs_list = []
        if 'meatpath_prefix_resource' in self.ctx and len(str(self.ctx['meatpath_prefix_resource'])) > 0:
            meatpath_resource_id = self.ctx['meatpath_prefix_resource']
            meatpath_resource_path = SandboxTask.sync_resource(self, meatpath_resource_id)

            with open(meatpath_resource_path, 'r') as f:
                content = f.read().split('\n')
            self.ctx['src_prefix'] = self.MakeSlashedPath(content[0].strip())
            if len(content) > 1:
                logs_list = content[1].strip().split(',')
        else:
            logs_list = self.ctx['logs_list'].strip().split(',')

        logs_list = list(set(logs_list))
        logging.info('logs_list: ' + str(logs_list))

        self.INPUT_DATA = []
        input_data = eval(self.ctx['input_data'])

        for input_data_item in input_data:
            input_data_item = list(input_data_item)
            self.CorrectionKostyl(input_data_item, 0, 1)
            self.CorrectionKostyl(input_data_item, 5, 6)

            item_str = str(input_data_item)
            if '%log%' in item_str:
                for log in logs_list:
                    log_name = us_ci.GetYTLogName(log)
                    self.INPUT_DATA.append(eval(item_str.replace('%log%', log_name)))
            else:
                self.INPUT_DATA.append(input_data_item)

        logging.info('parsed INPUT_DATA: ' + str(self.INPUT_DATA))

    def on_execute(self):
        import sys
        reload(sys)
        sys.setdefaultencoding('utf-8')

        self.env['MR_RUNTIME'] = 'YT'
        self.env['YT_USE_YAMR_DEFAULTS'] = '1'
        self.env['YT_PREFIX'] = '//'
        self.env['YT_POOL'] = self.ctx[YTPool.name]

        owner = self.ctx.get(YTTokenOwner.name, '')
        if not owner:
            owner = self.owner
        self.env['YT_TOKEN'] = self.get_vault_data(owner, self.ctx[YTToken.name])

        self.ParseInputData()

        self.ctx['diff_tool_path'] = SandboxTask.sync_resource(self, self.ctx['diff_tool_id'])
        self.ctx['mr_ls_path'] = SandboxTask.sync_resource(self, self.ctx['mr_ls_id'])

        if 'yt_tool_id' in self.ctx and self.ctx['yt_tool_id'] is not None > 0:
            self.ctx['yt_tool_path'] = SandboxTask.sync_resource(self, self.ctx['yt_tool_id'])

        diffs = self.CalculateDiff()
        self.StoreDiffResult(diffs)


__Task__ = SessionsDiffTask
