# -*- coding: utf-8 -*-

import json
import logging
import os
import sys

from sandbox.sandboxsdk.task import SandboxTask
from sandbox.sandboxsdk.parameters import SandboxStringParameter
from sandbox.sandboxsdk.parameters import SandboxIntegerParameter

import sandbox.projects.logs.common as us_ci
from sandbox.projects.common.arcadia import sdk as arcadia_sdk


class MapreduceCluster(SandboxStringParameter):
    """
        Кластер, на котором извлекаем данные
    """
    name = 'mr_cluster'
    description = 'MR cluster with fetched data'
    default_value = 'cedar00:8013'


class Meatpath(SandboxStringParameter):
    name = 'meatpath'
    description = 'Папка, с самой свежей выжимкой. Если не задана, то будет сделана попытка получить путь к данным из ресурса, id которого прописан в поле контекста path_resource_id'


class LogsList(SandboxStringParameter):
    """
        Имена логов, разделенные запятыми, для которых нужно запускать тестирование
    """
    name = 'logs_list'
    description = 'Comma-separated logs list for which we should run testing'


class DestinationSuffix(SandboxStringParameter):
    """
        Суффикс, который припишем к пути с сессиями
    """
    name = 'dest_suffix'
    description = 'Suffix which will be added to sessions folder name'


class CreateSessions(SandboxStringParameter):
    name = 'create_sessions_id'
    description = 'Идентификатор ресурса бинарника create_sessions'


class UserSessionsErrors(SandboxStringParameter):
    name = 'user_sessions_errors_id'
    description = 'Идентификатор ресурса бинарника user_sessions_errors'


class MrCatId(SandboxStringParameter):
    name = 'mr_cat_id'
    description = 'Идентификатор ресурса бинарника mr_cat_id'


class MrLsId(SandboxStringParameter):
    name = 'mr_ls_id'
    description = 'Идентификатор ресурса бинарника mr_ls_id'


class Geodata(SandboxStringParameter):
    name = 'geodata_id'
    description = 'Идентификатор ресурса файла geodata'


class Blockstat(SandboxStringParameter):
    name = 'blockstat_id'
    description = 'Идентифкатор ресурса файла blockstat.dict'


class BetaList(SandboxStringParameter):
    name = 'beta_list_id'
    description = 'Идентифкатор ресурса файла beta_list.txt'


class DirectPageids(SandboxStringParameter):
    name = 'direct_pageids_id'
    description = 'Идентифкатор ресурса файла direct_pageids'


class DirectResourceNoDict(SandboxStringParameter):
    name = 'direct_resourceno_dict_id'
    description = 'Идентифкатор ресурса файла direct_resourceno_dict'


class DirectAdsDescriptions(SandboxStringParameter):
    name = 'direct_ads_descriptions_id'
    description = 'Идентифкатор ресурса файла direct_ads_descriptions'


class YTToken(SandboxStringParameter):
    name = 'yt_token'
    description = 'Name of secret with YT token (from sb-vault)'


class YTTokenOwner(SandboxStringParameter):
    name = 'yt_token_owner'
    description = 'Owner of secret with YT token (default: owner of task)'


class YTPool(SandboxStringParameter):
    name = 'yt_pool'
    description = 'YT pool'


class CreateSessionsMaxMutualProcesses(SandboxIntegerParameter):
    name = 'max_mutual_processes'
    description = 'Максимальное количество одновременно запущенных процессов построения сессий'
    default_value = 15


class ValidateSessionsMaxMutualProcesses(SandboxIntegerParameter):
    name = 'validate_sessions_max_mutual_processes'
    description = 'Максимальное количество одновременно запущенных процессов валидации сессий'
    default_value = 30


def list_bunch(base_list, max_size):
    frame = 0

    while frame < len(base_list):
        bunch_size = min(len(base_list) - frame, max_size)
        yield base_list[frame: frame + bunch_size]

        frame += bunch_size


class CreateSessionsTask(SandboxTask):
    type = "CREATE_SESSIONS_TESTS"

    input_parameters = [MapreduceCluster, Meatpath, LogsList, DestinationSuffix, CreateSessions, UserSessionsErrors, Geodata,
                        Blockstat, BetaList, DirectPageids, DirectResourceNoDict, DirectAdsDescriptions,
                        MrCatId, MrLsId, YTToken, YTTokenOwner, YTPool, CreateSessionsMaxMutualProcesses, ValidateSessionsMaxMutualProcesses]

    cores = 1
    required_ram = 8072
    execution_space = 10 * 1024

    def __init__(self, *args, **kwargs):
        SandboxTask.__init__(self, *args, **kwargs)

    def InitEnv(self):
        self.env = dict(os.environ)

        self.env['MR_RUNTIME'] = 'YT'
        self.env['YT_PREFIX'] = '//'
        self.env['YT_POOL'] = self.ctx[YTPool.name]

        owner = self.ctx.get(YTTokenOwner.name, '')
        if not owner:
            owner = self.owner
        self.env['YT_TOKEN'] = self.get_vault_data(owner, self.ctx[YTToken.name])

        YTSpec = {
            "job_io": {
                "table_writer": {
                    "max_row_weight": 128 * 1024 * 1024,
                },
            },
            "data_size_per_job": 2194967296,
            "reducer": {"memory_limit": 4294967296},
            "mapper": {"memory_limit": 4294967296},
        }
        self.env['YT_SPEC'] = json.dumps(YTSpec)

    def CreateSessions(self, logs_list):
        from us_processes import nirvana_graphs

        commands = []
        for log in logs_list:
            log = log.strip()

            cmd = [self.ctx['create_sessions'],
                   '-server', self.ctx['mr_cluster'],
                   '-create', nirvana_graphs._KostylTransformLogOptionAlias(log),
                   '-strict_source_path', self.PathsManager.LogTablePath(log),
                   '-strict_dst_dir_path', self.PathsManager.SessionDirPathWithoutSessionsSuffix(log),
                   '-work_directory', '"' + self.PathsManager.CreateSessionsWorkdir() + '"',
                   '-date', '""',
                   '-suf', '"' + self.PathsManager.CreateSessionsSuffix() + '"',
                   '-skips',
                   '-tech',
                   '-yandex_staff',
                   '-create_yandex_servers_sessions',
                   '-geodata6', self.ctx['geodata'],
                   '-bsdict', self.ctx['blockstat'],
                   '-beta_list', self.ctx['beta_list'],
                   '-direct_pageids', self.ctx['direct_pageids'],
                   '-direct_resourceno_dict', self.ctx['direct_resourceno_dict'],
                   '-direct_ads_descriptions', self.ctx['direct_ads_descriptions'],
                   '-hide-revision',
                   '-hashed']

            commands.append(cmd)

        us_ci.RunSimultaneousProcesses(commands, self.env, max_processes_count=self.ctx[CreateSessionsMaxMutualProcesses.name], backoff_time=3)

    def ValidateSessions(self, logs_list):
        commands = []
        for log in logs_list:
            log = log.strip()

            cmd = [self.ctx['user_sessions_errors'],
                   '-s', self.ctx['mr_cluster'],
                   '-i', '"' + self.PathsManager.SessionPath(log) + '"',
                   '-o', '"' + self.PathsManager.ValidationResultPath(log) + '"',
                   '-geodata', self.ctx['geodata'],
                   '-b', self.ctx['blockstat'],
                   '-bl', self.ctx['beta_list'],
                   '-hashed']

            commands.append(cmd)

        us_ci.RunSimultaneousProcesses(commands, self.env, max_processes_count=self.ctx[ValidateSessionsMaxMutualProcesses.name], backoff_time=3)

    def ReadLogsListFile(self):
        content = []
        if 'path_resource_id' in self.ctx:
            meatpath_resource_id = self.ctx['path_resource_id']
            meatpath_resource_path = SandboxTask.sync_resource(self, meatpath_resource_id)

            with open(meatpath_resource_path, 'r') as f:
                content = f.read().split('\n')

        if self.ctx['meatpath'] == '':
            meatpath = content[0].strip()
        else:
            meatpath = self.ctx['meatpath']

        if 'logs_list' not in self.ctx or self.ctx['logs_list'] == '':
            if len(content) > 1:
                logs_list = content[1]
            else:
                raise Exception('No logs to build sessions from.')
        else:
            logs_list = self.ctx['logs_list']

        if meatpath == '':
            raise Exception('Не задан путь к свежему мясу')

        if meatpath[-1:] == '/':
            meatpath = meatpath[:-1]

        if meatpath.startswith('//'):
            meatpath = meatpath[2:]

        logs_list = [i.strip() for i in logs_list.split(',')]
        logs_list = list(set(logs_list))
        logs_list = [log for log in logs_list if ("similar" not in log and "mobile" not in log)]  # TODO get rid of tmp crutch

        return meatpath, logs_list

    def CorrectionKostylForPrecommitCheck(self):
        self.dest_suffix = self.ctx['dest_suffix']
        if len(self.ctx.get('arcadia_patch', "")) > 0:
            self.dest_suffix = self.dest_suffix + "_patched"

    def on_execute(self):
        self.CorrectionKostylForPrecommitCheck()
        self.InitEnv()

        with arcadia_sdk.mount_arc_path("arcadia:/arc/trunk/arcadia/quality/user_sessions/reactor/", use_arc_instead_of_aapi=True) as arc_dir:
            sys.path.append(arc_dir)

            logging.info(str(self.env))

            self.Meatpath, logs_list = self.ReadLogsListFile()

            logging.info('Meatpath: ' + self.Meatpath)

            logging.info('logs_list: ' + str(logs_list))
            if len(logs_list) == 0:
                raise Exception('empty logs list')

            self.ctx['geodata'] = SandboxTask.sync_resource(self, self.ctx['geodata_id'])
            self.ctx['blockstat'] = SandboxTask.sync_resource(self, self.ctx['blockstat_id'])
            self.ctx['beta_list'] = SandboxTask.sync_resource(self, self.ctx['beta_list_id'])
            self.ctx['direct_pageids'] = SandboxTask.sync_resource(self, self.ctx['direct_pageids_id'])
            self.ctx['direct_resourceno_dict'] = SandboxTask.sync_resource(self, self.ctx['direct_resourceno_dict_id'])
            self.ctx['direct_ads_descriptions'] = SandboxTask.sync_resource(self, self.ctx['direct_ads_descriptions_id'])

            self.ctx['create_sessions'] = SandboxTask.sync_resource(self, self.ctx['create_sessions_id'])
            self.ctx['user_sessions_errors'] = SandboxTask.sync_resource(self, self.ctx['user_sessions_errors_id'])

            self.PathsManager = us_ci.YtPathsManager(self.Meatpath, self.dest_suffix, need_checkout=True)
            self.CreateSessions(logs_list)
            self.ValidateSessions(logs_list)
            self.ctx['processed_logs_list'] = logs_list


__Task__ = CreateSessionsTask
