# -*- coding: utf-8 -*-

import os
import tarfile
from datetime import datetime

import sandbox.common.types.client as ctc
from sandbox.projects import resource_types
from sandbox.projects.common import apihelpers
from sandbox.sandboxsdk.process import run_process
from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk.task import SandboxTask
from sandbox.sandboxsdk.svn import Arcadia
from sandbox.sandboxsdk.paths import make_folder, copy_path
from sandbox.sandboxsdk.parameters import LastReleasedResource
from sandbox.sandboxsdk.parameters import ResourceSelector
from sandbox.sandboxsdk.parameters import SandboxStringParameter
from sandbox.sandboxsdk.parameters import SandboxBoolParameter
from sandbox.sandboxsdk.parameters import SandboxIntegerParameter
from sandbox.sandboxsdk.parameters import SandboxArcadiaUrlParameter


_QUERYREC_FILES = ('queryrec.dict', 'queryrec.weights')
_BLOCKSTAT_FILE = 'blockstat.dict'


class IndexCollectorExecutableParameter(LastReleasedResource):
    name = 'mrcollector_resource_id'
    description = 'Mrcollector binary'
    resource_type = 'MRCOLLECTOR_EXECUTABLE'


class BlockstatDictParameter(ResourceSelector):
    name = 'blockstat_dict_resource_id'
    description = 'File blockstat.dict, use latest if not specified'
    resource_type = 'YA_BLOCKSTAT_DICT'
    required = False


class QueryRecLocationParameter(SandboxArcadiaUrlParameter):
    name = 'queryrec_url'
    description = 'SVN url of folder with queryrec.dict and queryrec.weights'
    default_value = 'arcadia:/arc/branches/junk/snow/resinfod/arcadia_tests_data/wizard/language'
    required = True


class UILanguageParameter(SandboxStringParameter):
    name = 'ui_language'
    description = 'User interface language(s)'
    default_value = 'ALL'
    choices = [('All', 'ALL'), ('ru', 'ru'), ('tr', 'tr')]
    required = True


class PeriodParameter(SandboxIntegerParameter):
    name = 'collect_period'
    description = 'Number of days to collect data for'
    default_value = 30
    required = True


class UseCommonTmp(SandboxBoolParameter):
    name = 'use_common_tmp'
    description = 'Use common //tmp storage for temp tables'
    required = False
    default_value = False


class TopQueriesParameter(SandboxIntegerParameter):
    name = 'top_queries'
    description = 'Number of the most frequent queries to collect SERPs for'
    default_value = 300000  # limited by pumpkin servers' hdds
    required = True


class MrUseYtRuntime(SandboxBoolParameter):
    name = "mr_use_yt_runtime"
    description = "Use YT"
    default_value = False
    required = True


class MrYtOAuthTokenOwner(SandboxStringParameter):
    name = "mr_yt_oauth_token_owner"
    description = "Vault record owner for YT OAuth token"
    default_value = "YALITE"
    required = True


class MrYtOAuthTokenName(SandboxStringParameter):
    name = "mr_yt_oauth_token_name"
    description = "Vault record name for YT OAuth token"
    default_value = "robot-sepe-pumpkin-yt-token"
    required = True


class MrServerParameter(SandboxStringParameter):
    name = "mr_server"
    description = "Mapreduce server"
    required = True


class MrUserParameter(SandboxStringParameter):
    name = "mr_user"
    description = "Mapreduce user"
    default_value = "tmp"
    required = True


class MrTableParameter(SandboxStringParameter):
    name = "mr_table_prefix"
    description = "Mapreduce table prefix"
    required = False


class BaseBuildPumpkinIndexTask(SandboxTask):
    """
        Базовый класс для построения индекса для Тыквы
    """

    input_parameters = (
        QueryRecLocationParameter,
        IndexCollectorExecutableParameter,
        BlockstatDictParameter,
        PeriodParameter,
        TopQueriesParameter,
        UILanguageParameter,
        UseCommonTmp,
        MrUseYtRuntime,
        MrYtOAuthTokenOwner,
        MrYtOAuthTokenName,
        MrUserParameter,
        MrServerParameter,
        MrTableParameter
    )

    INDEX_RESOURCE_TYPE = resource_types.PUMPKIN_INDEX
    INDEX_RESOURCE_KEY = "index_resource_id"

    ARCHIVE_RESOURCE_TYPE = resource_types.PUMPKIN_INDEX_ARCHIVE
    ARCHIVE_RESOURCE_KEY = "archive_resource_id"

    ARCHIVE_PREFIX = "generalization-index"
    INDEX_VERSION = 1

    client_tags = ctc.Tag.Group.LINUX
    cores = 1
    required_ram = 8072
    execution_space = 6 * 1024

    def _make_resources(self, attributes={}):
        # index
        index_resource = self.create_resource(
            self.descr, "pumpkin_index", self.INDEX_RESOURCE_TYPE, attributes=attributes)
        self.ctx[self.INDEX_RESOURCE_KEY] = index_resource.id

        # archive
        current_time = datetime.today().strftime("%Y.%m.%d_%H-%M")
        series = "{lang}-{ver}".format(
            lang=self.ctx[UILanguageParameter.name], ver=self.INDEX_VERSION)
        archive_name = "{0}_{1}_{2}.tgz".format(self.ARCHIVE_PREFIX, current_time, series)
        archive_attributes = {
            "series_and_volume": "{series} {volume}".format(series=series, volume="-1")
        }
        archive_attributes.update(attributes)
        archive_resource = self.create_resource(
            self.descr, archive_name, self.ARCHIVE_RESOURCE_TYPE, attributes=archive_attributes)
        self.ctx[self.ARCHIVE_RESOURCE_KEY] = archive_resource.id

    def _make_collector_cmd(self, additional_options):
        mrcollector_path = self.sync_resource(self.ctx[IndexCollectorExecutableParameter.name])

        cmd = [mrcollector_path]
        cmd += ['-p', str(self.ctx[PeriodParameter.name])]
        cmd += ['-n', str(self.ctx[TopQueriesParameter.name])]
        cmd += ['-l', self.ctx[UILanguageParameter.name]]

        if self.ctx.get(UseCommonTmp.name):
            cmd += ['-T']

        cmd += ['-S', self.ctx[MrServerParameter.name]]
        cmd += ['-U', self.ctx[MrUserParameter.name]]

        mr_table_prefix = self.ctx[MrTableParameter.name]
        if mr_table_prefix:
            cmd += ['-w', mr_table_prefix]

        cmd += additional_options

        return cmd

    def _build_index(self, collector_cmd, config_params={}):
        index_path = channel.sandbox.get_resource(self.ctx[self.INDEX_RESOURCE_KEY]).path
        collector_cmd = collector_cmd[:] + [index_path]

        make_folder(index_path)

        queryrec_url = self.ctx[QueryRecLocationParameter.name]
        for queryrec_file in _QUERYREC_FILES:
            Arcadia.export(
                "%s/%s" % (queryrec_url, queryrec_file),
                os.path.join(index_path, queryrec_file))

        blockstat_resource = self.ctx[BlockstatDictParameter.name]

        if not blockstat_resource:  # use latest
            blockstat_resource = apihelpers.get_last_resource(resource_types.YA_BLOCKSTAT_DICT)

        assert blockstat_resource, "failed to locate blockstat.dict"
        blockstat_path = self.sync_resource(blockstat_resource)

        copy_path(blockstat_path, os.path.join(index_path, _BLOCKSTAT_FILE), symlinks=True)

        with open(os.path.join(index_path, "metadata"), "w") as f:
            f.write("Index Build Command: %s\n" % " ".join(collector_cmd))
            f.write("Sandbox Task id: %s\n" % self.id)
            f.write("Time: %s\n" % datetime.now())

        if config_params:
            with open(os.path.join(index_path, "pumpkin.cfg"), "w") as f:
                for k, v in config_params.iteritems():
                    f.write("%s %s\n" % (k, v))

        environment = {}
        if self.ctx[MrServerParameter.name] == "cedar00.search.yandex.net:8013":
            environment.update({"MR_NET_TABLE": "ipv6"})
        if self.ctx[MrUseYtRuntime.name]:
            environment.update({"MR_RUNTIME": "YT"})

            token = self.get_vault_data(self.ctx[MrYtOAuthTokenOwner.name], self.ctx[MrYtOAuthTokenName.name])
            environment.update({"YT_TOKEN": token})

        run_process(collector_cmd, log_prefix='collect_index', environment=environment)

        archive_path = channel.sandbox.get_resource(self.ctx[self.ARCHIVE_RESOURCE_KEY]).path
        with tarfile.open(archive_path, "w|gz") as tar:
            tar.add(index_path, ".")
