# -*- coding: utf-8 -*-

import os
import os.path
import sys
import time
import glob
import itertools
import logging

import sandbox.common.types.client as ctc

from sandbox.projects import resource_types
from sandbox.projects.common import apihelpers

from sandbox.sandboxsdk.parameters import SandboxStringParameter, LastReleasedResource, SandboxIntegerParameter
from sandbox.sandboxsdk.paths import copy_path, make_folder
from sandbox.sandboxsdk.task import SandboxTask
from sandbox.sandboxsdk.process import run_process
from sandbox.sandboxsdk.svn import Arcadia
from sandbox.sandboxsdk.errors import SandboxTaskFailureError
from sandbox.sandboxsdk import environments
from sandbox.sandboxsdk.channel import channel

from sandbox.projects.common.wizard.providers import EntitySearchProvider


class YaneParseYanswerLog(SandboxTask):
    """
        Parse Yanswer answers from reqans log.
    """

    type = 'YANE_PARSE_YANSWER_LOG'

    environment = (environments.PipEnvironment('requests'),)

    # execution_space = 20 * 1024
    required_ram = 22000
    client_tags = ctc.Tag.LINUX_PRECISE

    class MRServer(SandboxStringParameter):
        name = 'mr_server'
        description = 'Server'
        default_value = 'sakura00.search.yandex.net:8013'
        required = True

    class User(SandboxStringParameter):
        name = 'mr_user'
        description = 'User'
        default_value = 'onto'
        required = True

    class MapReduce(LastReleasedResource):
        name = 'mapreduce_resource_id'
        description = 'Mapreduce package'
        resource_type = resource_types.MAPREDUCE_DEV_PACKAGE

    class SourceFolder(SandboxStringParameter):
        name = 'mr_source_folder'
        description = 'MR source folder'
        default_value = 'reqans_log'
        required = True

    class TargetFolder(SandboxStringParameter):
        name = 'mr_target_folder'
        description = 'MR target folder'
        default_value = 'yanswer/reqans_yans'
        required = True

    class Days(SandboxIntegerParameter):
        name = 'days_back'
        description = 'How many days back to process'
        default_value = 1
        required = True

    input_parameters = [MRServer, User, MapReduce, SourceFolder, TargetFolder, Days]

    def __init__(self, task_id=0):
        SandboxTask.__init__(self, task_id)
        self.ctx['kill_timeout'] = 12 * 60 * 60

    def on_execute(self):
        copy_path(
            os.path.join(Arcadia.get_arcadia_src_dir("arcadia:/arc/trunk/arcadia/yweb/scripts/datascripts/common"), "mapreducelib.py"),
            os.path.join(self.abs_path(), "mapreducelib.py")
        )

        copy_path(
            os.path.join(os.path.dirname(os.path.realpath(__file__)), "parse_reqans_log.py"),
            os.path.join(self.abs_path(), "parse_reqans_log.py")
        )

        if self.ctx['mapreduce_resource_id']:
            mapreduce_package_path = self.sync_resource(self.ctx['mapreduce_resource_id'])
        else:
            mapreduce_resource = apihelpers.get_last_resource(resource_types.MAPREDUCE_DEV_PACKAGE)
            mapreduce_package_path = self.sync_resource(mapreduce_resource)

        target = make_folder(os.path.join(self.abs_path(), 'mr'))
        run_process(['tar', '-C', target, '-zxf', mapreduce_package_path, '--wildcards', '*.deb'], log_prefix='tar')
        deb_dir = os.path.join(target, 'deb')

        deb_files_mask = os.path.join(target, '*.deb')
        for file in glob.glob(deb_files_mask):
            run_process(['dpkg', '-x', file, deb_dir], log_prefix='dpkg')
        mapreduce_binary_path = os.path.join(deb_dir, 'Berkanavt/mapreduce/bin/mapreduce-dev')

        stable_releases = channel.sandbox.list_releases(resource_type=resource_types.ENTITY_SEARCH_EXECUTABLE, arch='linux', limit=100)
        es_resources = list(itertools.chain.from_iterable(stable_release.resources for stable_release in stable_releases))
        tasks = [resource.task_id for resource in es_resources if channel.sandbox.get_task(resource.task_id).type == 'BUILD_ENTITYSEARCH']
        if not tasks:
            raise SandboxTaskFailureError("could't find task 'BUILD_ENTITYSEARCH' with resource 'ENTITY_SEARCH_EXECUTABLE'")

        # with EntitySearchProvider(apihelpers.get_last_released_resource(resource_types.ENTITY_SEARCH_EXECUTABLE).task_id) as provider:
        with EntitySearchProvider(tasks[0]) as provider:
            logging.debug('Using %s as entitysearch provider' % tasks[0])
            time.sleep(20)  # allowing Entity Search to fail
            if not provider.alive():
                raise SandboxTaskFailureError('Entity Search is dead')
            run_process(
                [
                    sys.executable,
                    "parse_reqans_log.py",
                    "-s", self.ctx["mr_server"],
                    "-m", mapreduce_binary_path,
                    "-u", self.ctx["mr_user"],
                    "-d", str(self.ctx["days_back"]),
                    "-f", self.ctx["mr_source_folder"],
                    "-t", self.ctx["mr_target_folder"],
                    "-p", str(provider.port)
                ],
                log_prefix="parse_reqans_log",
                environment={"MR_NET_TABLE": "ipv6"})


__Task__ = YaneParseYanswerLog
