# -*- coding: utf-8 -*-

import subprocess

import sandbox.common.types.client as ctc

from sandbox.sandboxsdk.task import SandboxTask
from sandbox.sandboxsdk.process import run_process
from sandbox.sandboxsdk import parameters

from sandbox.projects import resource_types


class MapreduceExe(parameters.ResourceSelector):
    name = "mapreduce_exe"
    description = "Mapreduce executable"
    resource_type = resource_types.MAPREDUCE_EXECUTABLE
    required = True


class MapreduceServer(parameters.SandboxStringParameter):
    name = "mapreduce_server"
    description = "Mapreduce server"
    required = True
    default_value = "sakura00.search.yandex.net:8013"


class MapreduceUser(parameters.SandboxStringParameter):
    name = "mapreduce_user"
    description = "Mapreduce user"
    requried = True
    default_value = "imgdev"


class MapreduceTable(parameters.SandboxStringParameter):
    name = "mapreduce_table"
    description = "Destination table with contexts"
    required = True


class HamsterwheelExe(parameters.ResourceSelector):
    name = "hamsterwheel_exe"
    description = "hamsterwheel executable"
    resource_type = resource_types.HAMSTERWHEEL_EXECUTABLE
    required = True


class HamsterwheelQueries(parameters.ResourceSelector):
    name = "masterwheel_queries"
    description = "Queries in masterwheel format"
    resource_type = resource_types.HAMSTERWHEEL_QUERIES
    required = True


class ImagesDownloadSnippetContexts(SandboxTask):
    """
        Задача создания сниппетных контекстов с помощью утилиты hamsterwheel с закачой на mapreduce
    """

    type = 'IMAGES_DOWNLOAD_SNIPPET_CONTEXTS'
    client_tags = ctc.Tag.Group.LINUX
    input_parameters = (MapreduceExe, MapreduceServer, MapreduceUser, MapreduceTable, HamsterwheelExe, HamsterwheelQueries)

    def initCtx(self):
        # timeout - 3 days
        self.ctx['kill_timeout'] = 3 * 24 * 60 * 60

    def on_execute(self):
        hamsterwheel_exe = self.sync_resource(self.ctx[HamsterwheelExe.name])
        hamsterwheel_queries = self.sync_resource(self.ctx[HamsterwheelQueries.name])

        mapreduce_exe = self.sync_resource(self.ctx[MapreduceExe.name])

        hamster_proc = run_process(
            [
                hamsterwheel_exe, "-i", hamsterwheel_queries, "-j", "5", "images",
                "-e", "&nocache=da&no-tests=da&relev=disable_query_url_ban&rearr=ImgQueryUrlBan_off&rwr=-ImgPatch&rearr=ShortBreak_off"
            ], wait=False, stdout=subprocess.PIPE, log_prefix="hamsterwheel", outputs_to_one_file=False
        )
        awk_proc = run_process(
            [
                "awk", "-F", "\t", "BEGIN{OFS=FS;i=0}{i=i+1; print i,$1;}"
            ], wait=False, stdin=hamster_proc.stdout, stdout=subprocess.PIPE, log_prefix="awk", outputs_to_one_file=False
        )
        run_process(
            [
                mapreduce_exe, "-server", self.ctx[MapreduceServer.name], "-opt", "user=" + self.ctx[MapreduceUser.name],
                "-write", self.ctx[MapreduceTable.name]
            ], stdin=awk_proc.stdout, shell=True, log_prefix="mapreduce"
        )


__Task__ = ImagesDownloadSnippetContexts
