# library for python MR streaming scripts
# https://wiki.yandex-team.ru/JandeksPoisk/KachestvoPoiska/MapReduce/mapreducelibpy


from __future__ import with_statement
import sys
import itertools
import struct
import zipfile
import types
import array
import os
import cPickle
import imp
import subprocess
import socket
import time
import random
import shutil
import tempfile
import threading
import httplib
import urllib
import urllib2
import urlparse
import json
import hashlib
import logging
import inspect
import getpass
import calendar
import functools

BUFFER_SIZE = 4096
__all__ = ["MapReduce", "MapReduceClient", "MRSession", "Record", "RecordsStream", "RecordTokenizer",
           "TemporaryTable", "CheckedRecord", "DummyRecord", "SharedTransaction", "IMRFunction"]
try:
    import __pypy__
    PYPY_ENABLED = True
except ImportError:
    PYPY_ENABLED = False


@functools.total_ordering
class SimpleRecord(object):
    __slots__ = ["key", "value", "tableIndex", "recordIndex"]

    def __init__(self, key, value, tableIndex=0, recordIndex=0):
        self.key = key
        self.value = value
        self.tableIndex = tableIndex
        self.recordIndex = recordIndex

    def __eq__(self, o):
        return isinstance(o, SimpleRecord) and self.items() == o.items() \
            and self.tableIndex == o.tableIndex and self.recordIndex == o.recordIndex

    def __lt__(self, o):
        if not isinstance(o, type(self)):
            raise AttributeError()
        return self.items() < o.items()

    def __repr__(self):
        return "SimpleRecord(%r, %r)" % (self.key, self.value)

    def items(self):
        return self.key, self.value

    def getTableIndex(self):
        return self.tableIndex

    def getRecordIndex(self):
        return self.recordIndex


@functools.total_ordering
class SubkeyedRecord(object):
    __slots__ = ["key", "subkey", "value", "tableIndex", "recordIndex"]

    def __init__(self, key, subkey, value, tableIndex=0, recordIndex=0):
        self.key = key
        self.subkey = subkey
        self.value = value
        self.tableIndex = tableIndex
        self.recordIndex = recordIndex

    def __eq__(self, o):
        return isinstance(o, SubkeyedRecord) and self.items() == o.items() \
            and self.tableIndex == o.tableIndex and self.recordIndex == o.recordIndex

    def __lt__(self, o):
        if not isinstance(o, type(self)):
            raise AttributeError()
        return self.items() < o.items()

    def __repr__(self):
        return "SubkeyedRecord(%r, %r, %r)" % (self.key, self.subkey, self.value)

    def items(self):
        return self.key, self.subkey, self.value

    def getTableIndex(self):
        return self.tableIndex

    def getRecordIndex(self):
        return self.recordIndex


def Record(*args, **kws):
    assert len(args) >= 2, "incorrect arguments count [ARGS: %s]" % repr(args)
    if len(args) < 3:
        return SimpleRecord(*args, **kws)
    return SubkeyedRecord(*args[:3], **kws)

MAX_KEY_SIZE = 4 * 2**10
MAX_VAL_SIZE = 64 * 2**20
MAX_FILE_SIZE = 2**30
# record with creation time checkings for key,subkey,value string type and
# correct length


def CheckedRecord(*args, **kws):
    assert 2 <= len(
        args) <= 3, "incorrect argument count [ARGS: %r]" % (args, )
    if not all(isinstance(arg, str) for arg in args):
        raise ValueError(
            "non-string arguments found in CheckedRecord(...) with ARGS: %r" % (args,))
    if len(args[0]) > MAX_KEY_SIZE:
        raise ValueError("wrong key size: %r" % args[0])
    if len(args) == 2:
        if len(args[1]) > MAX_VAL_SIZE:
            raise ValueError("wrong value size: %r" % args[1])
        return SimpleRecord(*args, **kws)
    else:
        if len(args[1]) > MAX_KEY_SIZE:
            raise ValueError("wrong subkey size: %r" % args[1])
        if len(args[2]) > MAX_VAL_SIZE:
            raise ValueError("wrong value size: %r" % args[1])
        return SubkeyedRecord(*args, **kws)

# dummy record with automatic string convertations


def DummyRecord(*args, **kws):
    return CheckedRecord(*map(str, args), **kws)

# it gives 10% - 20% of speedup compared to using struct


def WriteInt(stream, value, arr=array.array('i', [0, ])):
    arr[0] = value
    return arr.tofile(stream)


class RecordsStream:

    def __init__(self, stream, ctx):
        self.stream = stream
        self.currentTableIndex = 0
        self.lenvalMode = ctx.LenvalMode
        if not self.lenvalMode:
            self.delimiter = ctx.FieldSeparator
            self.terminator = ctx.RecordSeparator
            self.output = self.doutput
        else:
            self.output = self.loutput
        if hasattr(self.stream, "fileno"):
            self.autoflush = ctx.AutoFlush
            int_holder = array.array('i', [0, ])
            self.output_int = lambda _stream, _value: WriteInt(
                _stream, _value, arr=int_holder)
        else:
            self.autoflush = False
            int_struct = struct.Struct('i')
            self.output_int = lambda _stream, _value: _stream.write(
                int_struct.pack(_value))

    def outlenval(self, value):
        self.output_int(self.stream, len(value))
        self.stream.write(value)

    def lchangetable(self, index):
        if self.currentTableIndex != index:
            self.currentTableIndex = index
            self.output_int(self.stream, -1)
            self.output_int(self.stream, self.currentTableIndex)

    def loutput(self, rec):
        self.lchangetable(rec.getTableIndex())
        for v in rec.items():
            self.outlenval(v)
        if self.autoflush:
            self.stream.flush()

    def dchangetable(self, index):
        if self.currentTableIndex != index:
            self.currentTableIndex = index
            self.stream.write(str(self.currentTableIndex))
            self.stream.write(self.terminator)

    def doutput(self, rec):
        self.dchangetable(rec.getTableIndex())
        self.stream.write(self.delimiter.join(rec.items()))
        self.stream.write(self.terminator)
        if self.autoflush:
            self.stream.flush()


class HttpChunkedStream:
    bufsize = 65536

    def __init__(self, url):
        self.buffer = ""
        proto, hostname, path, query, fragment = urlparse.urlsplit(url)
        server, port = hostname.split(
            ":") if ":" in hostname else (hostname, 80)
        location = path + "?" + query
        self.conn = httplib.HTTPConnection(server, int(port))
        self.conn.putrequest("POST", location)
        self.conn.putheader("Transfer-Encoding", "chunked")
        self.conn.endheaders()

    def write(self, msg):
        offset = 0
        while len(self.buffer) + len(msg) - offset >= self.bufsize:
            send_length = self.bufsize - len(self.buffer)
            self._send(self.buffer + msg[offset: offset + send_length])
            offset += send_length
            self.buffer = ""
        self.buffer += msg[offset:]

    def flush(self):
        if self.buffer:
            self._send(self.buffer)
            self.buffer = ""

    def _send(self, text):
        self.conn.send("%x\r\n" % len(text))
        self.conn.send(text)
        self.conn.send("\r\n")

    def close(self):
        self.flush()
        self._send(self.buffer)
        response = self.conn.getresponse()
        if response.status != 200:
            raise RuntimeError("bad response code: %s, \"%s\"\n\n%s" % (
                response.status, response.reason, response.read()))

    def abort(self):
        """Close the socket without finalizing chunked stream"""

        self.conn.close()

    def closed(self):
        return self.conn.sock is None

#    """We don't want to autoclose stream after exceptions, so use close() for manual stream closing after writing operations"""
#    def __del__(self):
#        if not self.closed():
#            self.close()


def RecordTokenizer(stream, ctx):
    return LenRecordTokenizer(stream, ctx) if ctx.LenvalMode else DelimRecordTokenizer(stream, ctx)


def DelimRecordTokenizer(stream, ctx):
    delimiter = ctx.FieldSeparator
    terminator = ctx.RecordSeparator
    usingSubkey = ctx.SubkeyMode
    expect_table_switcher = ctx.EnableTableSwitching
    expect_record_indexes = ctx.EnumerateRecords
    table_index = 0
    record_index = 0

    def createRecord(line):
        args = line.split(delimiter, 2 if usingSubkey else 1)
        if len(args) < 2:
            raise ValueError("Incorrect line value: %s [splitted to: %s; delimiter: %s; terminator: %s]" % (
                repr(line), repr(args), ord(delimiter), ord(terminator)))
        return Record(*args, tableIndex=table_index, recordIndex=record_index)

    buffer = stream.read(BUFFER_SIZE)
    want_record_index = False
    search_ptr = 0
    while buffer != "":
        rec_ptr = 0
        while True:
            _brk = buffer.find(terminator, search_ptr)
            if _brk == -1:
                break
            if want_record_index:
                record_index = int(buffer[rec_ptr: _brk])
                want_record_index = False
            elif buffer.find(delimiter, rec_ptr, _brk) == -1 and (expect_table_switcher or expect_record_indexes):
                table_index = int(buffer[rec_ptr: _brk])
                want_record_index = expect_record_indexes
            else:
                yield createRecord(buffer[rec_ptr: _brk])
                if expect_record_indexes:
                    record_index += 1
            rec_ptr = search_ptr = _brk + len(terminator)
        search_ptr = len(buffer) - rec_ptr
        buffer = buffer[rec_ptr:] + stream.read(BUFFER_SIZE)
        if len(buffer) == search_ptr and len(buffer) > 0:  # last item at the stream
            assert not (buffer.find(delimiter) == -
                        1 and (expect_table_switcher or expect_record_indexes))
            yield createRecord(buffer)
            buffer = ""


class LenReader(object):
    __slots__ = ["buffer", "offset", "stream_read",
                 "table_index", "record_index"]
    STATE_READ_SIZE = 0
    STATE_READ_VALUE = 1
    STATE_READ_TABLE_INDEX = 2
    buf_size = 2**18
    int_struct_unpack = struct.Struct('i').unpack
    recindex_struct_unpack = struct.Struct('=iQ').unpack

    def __init__(self, stream):
        self.stream_read = stream.read
        self.buffer = ''
        self.offset = 0
        self.table_index = 0
        self.record_index = 0

    def readValue(self, expect_table_switcher=False, expect_record_indexes=False):
        o = self.offset
        b = self.buffer

        has_table_switcher = False
        size = b[o:o + 4]
        o += 4
        to_read = 4 - len(size)
        while to_read:
            b = self.stream_read(LenReader.buf_size)
            if b == '':
                if size:
                    raise Exception("incorrect record format")
                else:
                    raise StopIteration
            o = to_read
            t = b[:to_read]
            size += t
            to_read -= len(t)
        size, = LenReader.int_struct_unpack(size)

        if size == -1:
            if not (expect_table_switcher or expect_record_indexes):
                raise Exception("unexpected table switching key")
            has_table_switcher = True
            size = 12 if expect_record_indexes else 4
        result = b[o:o + size]
        size -= len(result)
        o += len(result)
        while size:
            b = self.stream_read(max(LenReader.buf_size, size))
            if b == '':
                raise Exception("incorrect record format")
            o = size
            t = b[:size]
            result += t
            size -= len(t)
        self.buffer = b
        self.offset = o

        if has_table_switcher:
            if expect_record_indexes:
                self.table_index, self.record_index = LenReader.recindex_struct_unpack(
                    result)
            else:
                self.table_index, = LenReader.int_struct_unpack(result)
            return self.readValue()
        else:
            if expect_record_indexes:
                self.record_index += 1
            return result


def LenRecordTokenizer(stream, ctx):
    usingSubkey = ctx.SubkeyMode
    lenReader = LenReader(stream)
    readValue = lenReader.readValue

    if usingSubkey:
        while True:
            yield SubkeyedRecord(readValue(ctx.EnableTableSwitching, ctx.EnumerateRecords), readValue(), readValue(), lenReader.table_index, lenReader.record_index)
    else:
        while True:
            yield SimpleRecord(readValue(ctx.EnableTableSwitching, ctx.EnumerateRecords), readValue(), lenReader.table_index, lenReader.record_index)


APP_EXEC, APP_PROXY, APP_FIND, APP_LS = "app_exec", "app_proxy", "app_find", "app_ls"
OP_EXEC, OP_IO, OP_MODIF, OP_RDCFG, OP_INTERNAL = set(["op_exec"]), set(
    ["op_io"]), set(["op_modify"]), set(["op_cfg_read"]), set(["op_internal"])
OPs_ALL = OP_EXEC | OP_IO | OP_MODIF | OP_RDCFG | OP_INTERNAL


class MetaMROperator(type):
    registered_ops = {}

    def __init__(cls, name, bases, dct):
        super(MetaMROperator, cls).__init__(name, bases, dct)
        if "op_name" in dct:
            cls.registered_ops[dct["op_name"]] = cls

    @classmethod
    def get(cls, name):
        if name in cls.registered_ops:
            return cls.registered_ops[name]
        raise AttributeError


class IMapReduceOperator(object):
    __metaclass__ = MetaMROperator

    def __init__(self, op, ctx):
        self.op = op
        self.records = RecordTokenizer(sys.stdin_orig, ctx)
        self.outfile = RecordsStream(sys.stdout_orig, ctx)

    def run(self):
        raise Exception("Not implemented")

    @classmethod
    def get_shell_params(cls, ctx):
        return [cls.shell_p]

    @classmethod
    def get_op_param(cls, ctx, name):
        return getattr(ctx, name)


class MapOperator(IMapReduceOperator):
    op_name = "map"
    op_type = OP_EXEC
    shell_p = "-map"

    @classmethod
    def get_op_param(cls, ctx, name):
        return getattr(ctx, "Map" + name, None) or getattr(ctx, name)

    def run(self):
        try:
            for record in self.records:
                for outrec in self.op(record):
                    self.outfile.output(outrec)
        except:
            logging.exception("'map' execution error, key: %s" %
                              getattr(record, "key", None))
            sys.exit(1)


class CombinedMapOperator(IMapReduceOperator):
    op_name = "combined_map"
    op_type = OP_EXEC
    shell_p = "-map"

    @classmethod
    def get_op_param(cls, ctx, name):
        return getattr(ctx, "Map" + name, None) or getattr(ctx, name)

    def run(self):
        try:
            for outrec in self.op(self.records):
                self.outfile.output(outrec)
        except:
            logging.exception("'combined_map' execution error")
            sys.exit(1)


class ReduceOperator(IMapReduceOperator):
    op_name = "reduce"
    op_type = OP_EXEC

    @classmethod
    def get_shell_params(cls, ctx):
        return ["-reducewsonly"] if ctx.ForceReduceWS \
            else ["-reducews"] if ctx.WithoutSort \
            else ["-hash-reduce"] if ctx.ForceHashReduce \
            else ["-reduce"]

    @classmethod
    def get_op_param(cls, ctx, name):
        return getattr(ctx, "Reduce" + name, None) or getattr(ctx, name)

    def run(self):
        key = None
        try:
            for key, records in itertools.groupby(self.records, key=lambda rec: rec.key):
                for outrec in self.op(key, records):
                    self.outfile.output(outrec)
        except:
            logging.exception("'reduce' execution error, key: %s" % key)
            sys.exit(1)


class CombinedReduceOperator(IMapReduceOperator):
    op_name = "combined_reduce"
    op_type = OP_EXEC

    @classmethod
    def get_shell_params(cls, ctx):
        return ["-reducews"] if ctx.WithoutSort \
            else ["-hash-reduce"] if ctx.ForceHashReduce \
            else ["-reduce"]

    @classmethod
    def get_op_param(cls, ctx, name):
        return getattr(ctx, "Reduce" + name, None) or getattr(ctx, name)

    def run(self):
        input = None
        try:
            input = itertools.groupby(self.records, key=lambda rec: rec.key)
            for outrec in self.op(input):
                self.outfile.output(outrec)
        except:
            logging.exception("'combined_reduce' execution error")
            try:
                logging.warning("next key: '%s'", input.next()
                                [0] if input else None)
            except StopIteration:
                logging.warning("at end of input")
            sys.exit(1)


class InternalSortOperator(IMapReduceOperator):
    op_name = "internal-sort"
    op_type = OP_INTERNAL
    shell_p = "-sort"


class SortOperator(IMapReduceOperator):
    op_name = "sort"
    op_type = OP_MODIF
    shell_p = "-sort"


class CopyOperator(IMapReduceOperator):
    op_name = "copy"
    op_type = OP_MODIF
    shell_p = "-copy"


class MergeOperator(IMapReduceOperator):
    op_name = "merge"
    op_type = OP_MODIF
    shell_p = "-merge"


class MoveOperator(IMapReduceOperator):
    op_name = "move"
    op_type = OP_MODIF
    shell_p = "-move"


class DefragOperator(IMapReduceOperator):
    op_name = "defrag"
    op_type = OP_MODIF

    @classmethod
    def get_shell_params(cls, ctx):
        return ["-defrag"] + (["full"] if ctx.FullDefrag else [])


class EggWriter(zipfile.ZipFile):
    """Class to create ZIP archive with Python library files and packages
    (Custom clone for PyZipFile)."""
    def_imp_pathes = [
        os.path.join(sys.prefix, "lib/python%s.%s" % sys.version_info[:2]),
        os.path.join(sys.prefix, "lib-python"),
        os.path.join(sys.prefix, "lib_pypy"),
    ]
    suffix_type = dict((i[0], i[2]) for i in imp.get_suffixes())

    def __init__(self, *args, **kws):
        self.save_source = kws.pop("save_source", False)
        self.include_modules = kws.pop("include_modules", set())
        self.exported_modules = kws.pop("export_modules", dict())

        zipfile.ZipFile.__init__(self, *args, **kws)
        #self.debug = True
        self.__store = set()
        self.has_unexported_modules = False

    def is_builtin_module(self, module):
        return imp.is_builtin(module.__name__) or not hasattr(module, "__file__") \
            or (any(module.__file__.startswith(sys_path) for sys_path in self.def_imp_pathes) and module.__name__ not in self.include_modules)

    @classmethod
    def split_module_path(cls, path):
        for suffix in cls.suffix_type:
            if path.endswith(suffix):
                return path[:-len(suffix)], suffix

    @classmethod
    def is_submodule(cls, module):
        return module.__name__.find(".") >= 0

    def get_module_code(self, basepath):
        has_types = {}
        for suffix in self.suffix_type:
            if os.path.isfile(basepath + suffix):
                has_types[self.suffix_type[suffix]] = basepath + suffix
        path = has_types.get(imp.C_EXTENSION)
        if path:
            yield path
        order = [imp.PY_SOURCE, imp.PY_COMPILED] if self.save_source else [
            imp.PY_COMPILED, imp.PY_SOURCE]
        for type in order:
            path = has_types.get(type)
            if path:
                yield path
                return

    def write_module(self, module):
        try:
            if not module or self.is_builtin_module(module) or self.is_submodule(module):
                return
        except AttributeError:
            # The pytest framework overrides getattr to raise an exception if the checked attribute starts with '_'.
            # Catch this exception here to be able to use mapreducelib through
            # pytest.
            return
        pathname = module.__file__
        dir, filename = os.path.split(pathname)
        cutlen = (len(dir) + 1) if dir else 0
        if not os.path.isfile(pathname):
            return
        if os.path.splitext(filename)[0] == "__init__":
            basedir, modname = os.path.split(dir)
            assert modname == module.__name__
            cutlen = len(basedir) + 1
            for dirname, dirs, files in os.walk(dir):
                for file in files:
                    mod_info = self.split_module_path(
                        os.path.join(dirname, file))
                    if mod_info:
                        for file in self.get_module_code(mod_info[0]):
                            self.write(file, file[cutlen:])
                    elif file.find(".so") > 0:
                        self.write(os.path.join(dirname, file), file)
        else:
            mod_info = self.split_module_path(pathname)
            if mod_info:
                for file in self.get_module_code(mod_info[0]):
                    self.write(file, file[cutlen:])

    def write(self, path, arcname):
        if arcname not in self.__store:
            self.__store.add(arcname)
            if os.path.splitext(arcname)[-1] == ".so":
                if os.path.sep not in arcname:  # independent so
                    self.exported_modules[path] = os.path.basename(arcname)
                    if self.debug:
                        print "module \"%s\" exported as \"%s\"" % (path, arcname)
                else:  # so inside package, cannot export
                    self.has_unexported_modules = True
                    if self.debug:
                        print "cannot export module \"%s\" as \"%s\"" % (path, arcname)

            zipfile.ZipFile.write(self, path, arcname)
            if self.debug:
                print "module \"%s\" writed as \"%s\"" % (path, arcname)


class MapReduceDistributive:
    LOADER_FILE = "__%s_loader__.py"
    SOURCE_FILE = "__%s_source__.py"
    STORAGE_FILE = "__%s_dump.pickle"

    EggWriterClass = EggWriter

    @classmethod
    def isSubmodule(cls, name):
        return name.find(".") > 0

    @classmethod
    def getLoaderFile(cls, directory, kind="map_reduce"):
        return os.path.join(directory, cls.LOADER_FILE % kind)

    @classmethod
    def getStorageFile(cls, directory, kind="map_reduce"):
        return os.path.join(directory, cls.STORAGE_FILE % kind)

    @classmethod
    def getSourceFile(cls, directory, kind="map_reduce"):
        return os.path.join(directory, cls.SOURCE_FILE % kind)

    @classmethod
    def setEggWriterClassInstance(cls, eggWriterClassInstance):
        cls.EggWriterClass = eggWriterClassInstance

    def getLoaderShell(self):
        if self.isShellOperation:
            return self.operation
        opModule = sys.modules.get(getattr(self.operation, "__module__", None))
        opFile = opModule.__file__ if opModule else ""
        shellPrefix = "#%s:%r#\n LD_LIBRARY_PATH=. PYTHON_EGG_CACHE=." % (
            opFile, self.operation)
        pythonBin = ("pypy%s.%s" % sys.pypy_version_info[:2] if not self.sourceSaveMode else "pypy") if PYPY_ENABLED \
            else ("python%s.%s" % sys.version_info[:2] if not self.sourceSaveMode else "python")
        return " ".join([shellPrefix, pythonBin, self.getLoaderFile(".", self.type), self.getStorageFile(".", self.type)])

    def __init__(self, type, operation, ctx):
        op_cls = MetaMROperator.get(type)

        self.type = type
        self.isShellOperation = isinstance(operation, str)
        self.operation = operation
        self.sourceSaveMode = ctx.SaveSource
        self.terminator = op_cls.get_op_param(ctx, "RecordSeparator")
        self.delimiter = op_cls.get_op_param(ctx, "FieldSeparator")
        self.usingSubkey = op_cls.get_op_param(ctx, "SubkeyMode")
        self.lenvalMode = op_cls.get_op_param(ctx, "LenvalMode")
        self.exportFiles = ctx.Files
        self.includeModules = ctx.IncludeModules
        self.workingDir = ctx.WorkDir
        self.expectTableSwitcher = ctx.EnableTableSwitching
        self.expectRecordsIndexes = ctx.EnumerateRecords

    def __enter__(self):
        self.directory = os.path.abspath(tempfile.mkdtemp(dir=self.workingDir))
        if not self.isShellOperation:
            arcWriter = self.EggWriterClass(os.path.join(self.directory, "%s_modules.egg" % self.type), "w",
                                            save_source=self.sourceSaveMode, include_modules=self.includeModules,
                                            export_modules=self.exportFiles)
            for module in sys.modules.values():
                arcWriter.write_module(module)
            arcWriter.close()
            self.hasUnexportedModules = arcWriter.has_unexported_modules

            sourceFile = sys.modules['__main__'].__file__
            if sourceFile:
                shutil.copy(sourceFile, self.getSourceFile(
                    self.directory, self.type))
            with open(self.getStorageFile(self.directory, self.type), "w") as storWriter:
                self.createStorage(self.operation, storWriter)
            with open(self.getLoaderFile(self.directory, self.type), "w") as loaderWriter:
                self.createLoader(self.type, loaderWriter)
        for fpath, fname in self.exportFiles.iteritems():
            toPath = os.path.join(self.directory, fname)
            if os.path.isfile(toPath):
                raise RuntimeError("can't override file '%s'" % toPath)
            shutil.copy(fpath, toPath)
        return self

    def __exit__(self, type, value, tb):
        if getattr(self, "directory", None):
            shutil.rmtree(self.directory)
            self.directory = None

    def getFiles(self):
        if getattr(self, "directory", None):
            for file in os.listdir(self.directory):
                yield os.path.join(self.directory, file)

    def createStorage(self, operation, stream):
        if inspect.ismethod(operation):
            method_name = operation.__name__
            instance = operation.__self__
        else:
            method_name = '__call__'
            instance = operation

        cPickle.dump((instance, method_name), stream, protocol=2)

    def createLoader(self, type, stream):
        print >>stream, """\
#!/usr/bin/env python
import sys, imp, zipfile, os
modulesEgg = './%(runtype)s_modules.egg'
if %(hasUnexportedModules)r and os.path.isfile(modulesEgg) and zipfile.is_zipfile(modulesEgg):
    modulesDir = os.path.join(os.getenv('PYTHON_EGG_CACHE'), 'modules')
    zipfile.ZipFile(modulesEgg).extractall(path=modulesDir)
    sys.path[0:0] = '.', modulesDir
else:
    sys.path[0:0] = '.', modulesEgg
sourceFile = %(sourceFile)r
sourceModule = imp.load_module('__%(runtype)s_source__', open(sourceFile, 'U'), sourceFile, ('.py', 'U', 1))
sys.modules['__main__'] = sourceModule
import mapreducelib, cPickle, sys
storFile = sys.argv[1]
instance, method_name = cPickle.load(open(storFile))
op = getattr(instance, method_name)
ctx = mapreducelib.MapReduceContext()
ctx.update(
    delimiter=%(delimiter)r, terminator=%(terminator)r, usingSubkey=%(usingSubkey)r,
    lenvalMode=%(lenvalMode)r, enableTableSwitching=%(expectTableSwitcher)r, enumerateRecords=%(expectRecordsIndexes)r
)
sys.stdout_orig = sys.stdout
sys.stdout = sys.stderr
sys.stdin_orig = sys.stdin
sys.stdin = None
mapreducelib.MetaMROperator.get("%(runtype)s")(op, ctx).run()""" % dict(
            hasUnexportedModules=self.hasUnexportedModules,
            sourceFile=self.SOURCE_FILE % type,
            delimiter=self.delimiter,
            terminator=self.terminator,
            usingSubkey=self.usingSubkey,
            lenvalMode=self.lenvalMode,
            expectTableSwitcher=self.expectTableSwitcher,
            expectRecordsIndexes=self.expectRecordsIndexes,
            runtype=type
        )


def DefaultModifier(oldvalue, newvalue):
    return newvalue


class SchedKeyModifier:

    def __init__(self, key, type_func=str):
        self._key = key
        self._typeFn = type_func

    def __call__(self, oldvalue, newvalue):  # oldvalue: None or dictionary, newvalue: username
        sched = oldvalue.copy() if oldvalue else {}
        sched[self._key] = self._typeFn(newvalue)
        return sched


def SchedOptionsModifier(oldvalue, newvalue):
    if oldvalue:
        for k, v in oldvalue.iteritems():
            newvalue.setdefault(k, v)
    return newvalue


# oldvalue: always None, newvalue: string
def SingleTableModifier(oldvalue, newvalue):
    return ListTableModifier(oldvalue, [newvalue])


def ListTableModifier(oldvalue, newvalue):  # oldvalue: always None, newvalue: list
    if oldvalue:
        raise RuntimeError("multiple tables input/output definition")
    if isinstance(newvalue, types.StringTypes):
        raise RuntimeError(
            "incorrect tables input/output type: list exepected")
    newvalue = [table.name if isinstance(
        table, TemporaryTable) else table for table in newvalue]
    return newvalue


# oldvalue: None or dictionary, newvalue: list or dictionary
def FileListModifier(oldvalue, newvalue):
    files = oldvalue.copy() if oldvalue else {}
    if isinstance(newvalue, list):
        newvalue = dict((path, os.path.split(path)[-1]) for path in newvalue)
    for path, name in newvalue.iteritems():
        files[path] = name
    return files


def BoolOptionApplier(options, key, value, *args):
    if isinstance(options, list):
        if value:
            options.append(key)
    elif isinstance(options, dict):
        options[key] = "true" if value else "false"


def StringOptionApplier(options, key, value, *args):
    if isinstance(options, list):
        options += [key, str(value)]
    elif isinstance(options, dict):
        options[key] = str(value)


class TypedRangeOptionApplier:

    def __init__(self, type, begin=None, end=None):
        self._type = type
        self._begin = begin
        self._end = end

    def __call__(self, options, key, value, *args):
        strValue = str(value)

        try:
            typedValue = self._type(strValue)
        except ValueError:
            raise AttributeError(
                "incorrect value string: \"%s\", key: \"%s\"" % (strValue, key))

        if not (self._begin is None or typedValue >= self._begin) or not (self._end is None or typedValue <= self._end):
            raise AttributeError(
                "value out of range: \"%s\", key: \"%s\"" % (strValue, key))

        if isinstance(options, list):
            options += [key, strValue]
        elif isinstance(options, dict):
            options[key] = strValue


def ListOptionApplier(options, key, values, *args):
    if isinstance(options, list):
        for v in values:
            options += [key, v]
    elif isinstance(options, dict):
        raise TypeError("not yet implemented")


def DestTablesOptionApplier(options, key, values, app_name, op_type, context, *args):
    if context.options.get("SortedOutput"):
        key += "sorted"
    return ListOptionApplier(options, key, values, app_name, op_type, context, *args)


def SchedAttributesApplier(options, key, dct, app_name, op_type, *args):
    """in 'exec' mode appends comma-separated string with k=v and key="-schedule"
       in 'proxy' mode appends only username value"""
    if app_name == APP_EXEC:
        assert isinstance(options, list)
        schedOption = ",".join("%s=%s" % i for i in dct.iteritems())
        options += [key, schedOption]
    if app_name == APP_PROXY and "user" in dct:
        assert isinstance(options, dict)
        options["username"] = dct["user"]


def AuxArgumentsApplier(options, key, values, *args):
    if isinstance(options, list):
        options += values
    elif isinstance(options, dict):
        raise TypeError("not yet implemented")


def TransactionApplier(options, key, value, *args):
    if isinstance(value, SharedTransaction) and isinstance(options, list):
        StringOptionApplier(
            options, "-sharedtransactionid", value.ident, *args)
        if value.timeout is not None:
            StringOptionApplier(options, "-finishtimeout",
                                value.timeout, *args)


class Option(object):
    ApplierByType = {
        "str": StringOptionApplier, "int": StringOptionApplier,
        "list": ListOptionApplier, "bool": BoolOptionApplier}

    def __init__(self, opname, type="str", default_value=None, app_to_prms=None, appliable_types=None, applier=None):
        self.name = opname
        self._typeFn = eval(type)
        self._defaultValue = self._typeFn(
            default_value) if default_value is not None else None
        self._applyFn = applier or self.ApplierByType.get(type, None)
        self._app_to_prms = app_to_prms or {}
        self._appliable_optypes = appliable_types or set()

    def toProperType(self, value):
        return self._typeFn(value)

    def getDefaultValue(self):
        return self._defaultValue

    def apply(self, app_name, op_type, options, value, context):
        if op_type & self._appliable_optypes:
            key = self._app_to_prms.get(app_name, None)
            if key is not None:
                self._applyFn(options, key, value, app_name, op_type, context)


class TableInfo(object):
    """Table info, as provided by 'mr_ls -l'"""
    __slots__ = ["is_sorted", "owner", "chunks", "records",
                 "size", "disk_size", "compressed_by", "mtime", "name"]

    def __init__(self, obj, fmt="ls"):
        if fmt == "ls":
            if not isinstance(obj, str):
                raise ValueError("expected string description for TableInfo")
            tok = obj.split()
            if len(tok) == 9:
                self.fillFromLsTokens(tok)
            else:
                raise ValueError("could not understand mr_ls output")
        elif fmt == "json":
            if not isinstance(obj, dict):
                raise ValueError(
                    "expected in-dict encapsulated description for TableInfo")
            self.fillFromJsonDict(obj)
        else:
            raise ValueError("unknown TableInfo format: " + fmt)

    def fillFromLsTokens(self, tok):
        self.is_sorted = tok[0] == "s"
        self.owner = tok[1]
        self.chunks = int(tok[2])
        self.records = int(tok[3])
        self.size = int(tok[4])
        self.disk_size = int(tok[5])
        self.compressed_by = tok[6]
        self.mtime = time.gmtime(int(tok[7]))
        self.name = tok[8]

    def fillFromJsonDict(self, dct):
        self.is_sorted = bool(dct.get("sorted", False))
        self.owner = dct["user"]
        self.chunks = dct["chunks"]
        self.records = dct["records"]
        self.size = dct["size"]
        self.disk_size = dct["disk_size"]
        self.compressed_by = dct["compression_algo"]
        self.mtime = time.gmtime(dct["mod_time"])
        self.name = dct["name"]

    def toJson(self, **json_options):
        dct = dict(
            sorted=self.is_sorted,
            user=self.owner,
            chunks=self.chunks,
            records=self.records,
            size=self.size,
            disk_size=self.disk_size,
            compression_algo=self.compressed_by,
            mod_time=self.timestamp,
            name=self.name
        )
        return json.dumps(dct, **json_options)

    @property
    def timestamp(self):
        return calendar.timegm(self.mtime)

    def __str__(self):
        return ", ".join([n + ": " + str(object.__getattribute__(self, n)) for n in self.__slots__])

    def __getstate__(self):
        return dict((k, getattr(self, k)) for k in self.__slots__)

    def __setstate__(self, sdict):
        for k, v in sdict.iteritems():
            setattr(self, k, v)


class DefaultTableInfo(TableInfo):

    def __init__(self, name):
        super(DefaultTableInfo, self).__init__({
            "user": "",
            "chunks": 0,
            "records": 0,
            "size": 0,
            "disk_size": 0,
            "compression_algo": "",
            "mod_time": 0,
            "name": name
        }, fmt="json")

    def __nonzero__(self):
        return False


class TemporaryObject(object):
    ABC = "qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM0123456789"

    @classmethod
    def get_login(cls):
        login = os.environ.get('LOGNAME') or os.environ.get('USER')
        if login is not None:
            return login
        return getpass.getuser()

    @classmethod
    def random_string(cls, length=8):
        return "".join(random.choice(cls.ABC) for _ in xrange(length))


class SharedTransaction(TemporaryObject):

    @classmethod
    def __generate_ident(cls):
        return "{0}:{1}-{2}".format(cls.get_login(), cls.random_string(), int(time.time()))

    def __init__(self, ident=None, timeout=None):
        if isinstance(ident, SharedTransaction):  # copy constructor
            assert timeout is None
            ident, timeout = ident.ident, ident.timeout
        self.ident = self.__generate_ident() if ident is None else ident
        self.timeout = timeout


def _genAvailableOptions():
    AvailableOptions = dict((op.name, op) for op in [
        Option("MrExec", default_value=os.environ.get(
            "DEF_MR_EXEC", "mapreduce")),
        Option("MrFindExec", default_value="mr_find"),
        Option("MrLsExec", default_value="mr_ls"),
        Option("WorkDir", default_value=os.environ.get(
            "MR_MAPREDUCELIB_TMPDIR", ".")),
        Option("ProxyServer"),
        Option("Server", default_value=os.environ.get("DEF_MR_SERVER", "sdf200:8013"), app_to_prms={
               APP_EXEC: "-server", APP_FIND: "-s", APP_LS: "-s"}, appliable_types=OPs_ALL),
        Option("Verbose", type="bool", default_value=False),
        Option("LoggerName"),
        Option("TestMode", type="bool", default_value=False),
        Option("RecordsCountLimit", type="int"),
        Option("IncludeModules", type="set", default_value=set()),
        Option("SaveSource", type="bool", default_value=(
            "DEF_MR_SAVE_SOURCE" in os.environ or PYPY_ENABLED)),
        Option("ScheduleAttributes", type="dict", app_to_prms={
               APP_EXEC: "-opt", APP_PROXY: "__FAKE__"}, appliable_types=OPs_ALL, applier=SchedAttributesApplier),
        Option("SubkeyMode", type="bool", default_value=True, app_to_prms={
               APP_EXEC: "-subkey", APP_FIND: "-S", APP_PROXY: "subkey"}, appliable_types=OP_EXEC | OP_IO),
        Option("MapSubkeyMode", type="bool", app_to_prms={
               APP_EXEC: "-subkey-map"}, appliable_types=OP_EXEC),
        Option("ReduceSubkeyMode", type="bool", app_to_prms={
               APP_EXEC: "-subkey-reduce"}, appliable_types=OP_EXEC),
        Option("LenvalMode", type="bool", default_value=True, app_to_prms={
               APP_EXEC: "-lenval", APP_FIND: "-lenval", APP_PROXY: "lenval"}, appliable_types=OP_EXEC | OP_IO),
        Option("MapLenvalMode", type="bool", app_to_prms={
               APP_EXEC: "-lenval-map"}, appliable_types=OP_EXEC),
        Option("ReduceLenvalMode", type="bool", app_to_prms={
               APP_EXEC: "-lenval-reduce"}, appliable_types=OP_EXEC),
        Option("EnableTableSwitching", type="bool", default_value=False, app_to_prms={
               APP_EXEC: "-tableindex"}, appliable_types=OP_EXEC | OP_IO),
        Option("EnumerateRecords", type="bool", default_value=False, app_to_prms={
               APP_EXEC: "-tablerecordindex"}, appliable_types=OP_EXEC | OP_IO),
        Option("FieldSeparator", default_value="\t", app_to_prms={
               APP_EXEC: "-fs", APP_PROXY: "fs"}, appliable_types=OP_EXEC | OP_IO),
        Option("MapFieldSeparator", app_to_prms={
               APP_EXEC: "-fs-map"}, appliable_types=OP_EXEC),
        Option("ReduceFieldSeparator", app_to_prms={
               APP_EXEC: "-fs-reduce"}, appliable_types=OP_EXEC),
        Option("RecordSeparator", default_value="\n", app_to_prms={
               APP_EXEC: "-rs", APP_PROXY: "rs"}, appliable_types=OP_EXEC | OP_IO),
        Option("MapRecordSeparator", app_to_prms={
               APP_EXEC: "-rs-map"}, appliable_types=OP_EXEC),
        Option("ReduceRecordSeparator", app_to_prms={
               APP_EXEC: "-rs-reduce"}, appliable_types=OP_EXEC),
        Option("AppendMode", type="bool", app_to_prms={
               APP_EXEC: "-append"}, appliable_types=OP_EXEC | OP_IO | OP_MODIF),
        Option("MemoryLimit", type="int", app_to_prms={
               APP_EXEC: "-memlimit"}, appliable_types=OP_EXEC),
        Option("TimeLimit", type="int", app_to_prms={
               APP_EXEC: "-timelimit"}, appliable_types=OP_EXEC),
        Option("AverageTimeLimit", type="str", app_to_prms={
               APP_EXEC: "-averagetimelimit"}, appliable_types=OP_EXEC),
        Option("SpawnBackupJobs", type="int", app_to_prms={
               APP_EXEC: "-spawnbackupjobs"}, appliable_types=OP_EXEC, applier=TypedRangeOptionApplier(int, 0, 16)),
        Option("PartialCommit", type="int", app_to_prms={
               APP_EXEC: "-partialcommit"}, appliable_types=OP_EXEC, applier=TypedRangeOptionApplier(int, 0, 100)),
        Option("MaxJobFails", type="int", app_to_prms={
               APP_EXEC: "-maxjobfails"}, appliable_types=OP_EXEC, applier=TypedRangeOptionApplier(int, 0)),
        Option("DataGrow", type="float", app_to_prms={
               APP_EXEC: "-datagrow"}, appliable_types=OP_EXEC, applier=TypedRangeOptionApplier(float, 0)),
        Option("ChunksPerHighLevel", type="int", app_to_prms={
               APP_EXEC: "-chunksperhighlevel"}, appliable_types=OP_EXEC | OP_IO | OP_MODIF, applier=TypedRangeOptionApplier(int, 0, 31)),
        Option("Files", type="dict", default_value={}),
        Option("ExportedFiles", type="list", app_to_prms={
               APP_EXEC: "-file"}, appliable_types=OP_EXEC),
        Option("ExportedMapFiles", type="list", app_to_prms={
               APP_EXEC: "-file-map"}, appliable_types=OP_EXEC),
        Option("ExportedReduceFiles", type="list", app_to_prms={
               APP_EXEC: "-file-reduce"}, appliable_types=OP_EXEC),
        Option("StartIndex", type="int", app_to_prms={
               APP_PROXY: "startindex"}, appliable_types=OP_IO),
        Option("EndIndex", type="int", app_to_prms={
               APP_PROXY: "endindex"}, appliable_types=OP_IO),
        Option("LowerKey", app_to_prms={
               APP_EXEC: "-lowerkey", APP_PROXY: "lowerkey"}, appliable_types=OP_IO),
        Option("UpperKey", app_to_prms={
               APP_EXEC: "-upperkey", APP_PROXY: "upperkey"}, appliable_types=OP_IO),
        Option("SourceTables", type="list", app_to_prms={
               APP_EXEC: "-src"}, appliable_types=OP_EXEC | OP_MODIF | OP_INTERNAL),
        Option("DestTables", type="list", app_to_prms={
               APP_EXEC: "-dst"}, applier=DestTablesOptionApplier, appliable_types=OP_EXEC | OP_MODIF | OP_INTERNAL),
        Option("FailOnEmptySrc", type="bool", app_to_prms={
               APP_EXEC: "-failonemptysrctable"}, appliable_types=OP_IO | OP_EXEC | OP_MODIF),
        Option("DynamicAllocMode", type="bool", app_to_prms={
               APP_EXEC: "-dynallocmode"}, appliable_types=OP_IO | OP_EXEC | OP_MODIF),
        Option("IoGroup", type="SharedTransaction", app_to_prms={
               APP_EXEC: "__FAKE__"}, appliable_types=OP_IO, applier=TransactionApplier),
        Option("FixedKey"),
        Option("SortMode", type="bool"),
        Option("WithoutSort", type="bool"),
        Option("ForceReduceWS", type="bool"),
        Option("ForceHashReduce", type="bool"),
        Option("SortedOutput", type="bool"),
        Option("AutoFlush", type="bool", default_value=True),
        Option("SourceProxy", type="list", app_to_prms={
               APP_EXEC: "-srcproxy", }, appliable_types=OP_MODIF),
        Option("SourceServer", type="str", app_to_prms={
               APP_EXEC: "-srcserver", }, appliable_types=OP_MODIF),
        Option("ErrorLevel", type="int", app_to_prms={
               APP_EXEC: "-stderrlevel"}, appliable_types=OPs_ALL),
        Option("SilentLocal", type="bool", app_to_prms={
               APP_EXEC: "-silentlocal"}, default_value=False, appliable_types=OPs_ALL),
        Option("CompressMode", app_to_prms={
               APP_EXEC: "-compress"}, appliable_types=OPs_ALL),
        Option("LocalCwd"),
        Option("YtFiles", type="list", app_to_prms={
               APP_EXEC: "-ytfile"}, appliable_types=OP_EXEC),
        Option("FullDefrag", type="bool"),
        Option("AuxExecArguments", type="list", app_to_prms={
               APP_EXEC: "__FAKE__"}, appliable_types=OP_EXEC, applier=AuxArgumentsApplier),
        # use last option 'AuxExecArguments' only for debug purposes, please
    ])
    AvailableModifiers = dict((op.name[0].lower(
    ) + op.name[1:], (op.name, DefaultModifier)) for op in AvailableOptions.itervalues())
    AvailableModifiers.update(dict(
        srcTable=("SourceTables", SingleTableModifier),
        srcTables=("SourceTables", ListTableModifier),
        dstTable=("DestTables", SingleTableModifier),
        dstTables=("DestTables", ListTableModifier),
        scheduleAttrs=("ScheduleAttributes", SchedOptionsModifier),
        optAttrs=("ScheduleAttributes", SchedOptionsModifier),
        username=("ScheduleAttributes", SchedKeyModifier("user")),
        chunkSize=("ScheduleAttributes", SchedKeyModifier("chunksize")),
        jobCount=("ScheduleAttributes", SchedKeyModifier("jobcount")),
        threadCount=("ScheduleAttributes", SchedKeyModifier("threadcount")),
        cpuIntensive=("ScheduleAttributes", SchedKeyModifier(
            "cpu.intensive.mode", type_func=int)),
        jobCountMultiplier=("ScheduleAttributes", SchedKeyModifier(
            "jobcount.multiplier", type_func=float)),
        files=("Files", FileListModifier),
        delimiter=("FieldSeparator", DefaultModifier),
        terminator=("RecordSeparator", DefaultModifier),
        usingSubkey=("SubkeyMode", DefaultModifier),
        count=("RecordsCountLimit", DefaultModifier),
        key=("FixedKey", DefaultModifier),
        mrbinary=("MrExec",  DefaultModifier),
    ))
    return AvailableOptions, AvailableModifiers


class MapReduceContext(object):
    AvailableOptions, AvailableModifiers = _genAvailableOptions()

    @classmethod
    def getDefaultOptions(cls):
        return dict((opname, op.getDefaultValue()) for opname, op in cls.AvailableOptions.iteritems() if op.getDefaultValue() is not None)

    def __init__(self, options=None):
        self.options = options.copy() if options else {}

    def __getattr__(self, attr):
        retval = self.options.get(attr, None)
        if retval is None and attr in self.AvailableOptions:
            retval = self.options.setdefault(
                attr, self.AvailableOptions[attr].getDefaultValue())
        return retval

    def update(self, **kws):
        for k, v in kws.iteritems():
            if v is not None:
                mod_descr = self.AvailableModifiers.get(k)
                if mod_descr is None:
                    raise AttributeError("unknown option %s" % k)
                opname, modifier = mod_descr
                if opname not in self.AvailableOptions:
                    raise RuntimeError(
                        "bad option name modifier %s(\"%s\")" % (k, opname))
                opvalue = self.options.get(opname, None)
                self.options[opname] = self.AvailableOptions[
                    opname].toProperType(modifier(opvalue, v))

    def copy(self):
        cls = type(self)
        return cls(self.options)

    def updated_copy(self, **kws):
        cp_object = self.copy()
        cp_object.update(**kws)
        return cp_object

    def getOptions(self, app_name, op_type, init_value):
        retval = init_value
        for opname, value in self.options.iteritems():
            if value is not None:
                self.AvailableOptions[opname].apply(
                    app_name, op_type, retval, value, self)
        return retval

    def popen(self, args, **kws):
        kws.setdefault('shell', False)
        self.logMessage("Running %s", args)
        kws['bufsize'] = 65536
        if os.name == 'posix':
            kws['close_fds'] = True
        if self.LocalCwd:
            kws['cwd'] = self.LocalCwd
        process = subprocess.Popen(args,
                                   stderr=subprocess.PIPE,
                                   **kws)
        threading.Thread(target=self.logStream,
                         args=(process.stderr, )).start()
        return process

    def runOS(self, args, **kws):
        process = self.popen(args, **kws)
        retcode = process.wait()
        if retcode:
            raise subprocess.CalledProcessError(retcode, args[0])

    def printRecords(self, records, stream, close_stream=True):
        try:
            outfile = RecordsStream(stream, self)
            for r in records:
                outfile.output(r)
        finally:
            if close_stream:
                stream.close()

    def getLogger(self):
        return logging.getLogger(self.LoggerName or "mapreducelib.default")

    def logMessage(self, fmt, *args):
        if self.Verbose or self.LoggerName:
            self.getLogger().info(fmt, *args)

    def logStream(self, stream):
        if self.Verbose or self.LoggerName:
            logger = self.getLogger()
            for line in stream:
                logger.debug(line.rstrip("\n"))
        else:
            for line in stream:
                sys.stderr.write(line)


class MapReduceClient(object):

    MapReduceDistributiveClass = MapReduceDistributive

    def __init__(self, **kws):
        self.default_options = MapReduceContext(
            MapReduceContext.getDefaultOptions())
        if kws:
            self.useDefaults(**kws)

    @classmethod
    def setMapReduceDistributiveClass(cls, mapReduceDistributiveClassInstance):
        cls.MapReduceDistributiveClass = mapReduceDistributiveClassInstance

    @classmethod
    def getShellBase(cls, ctx, op_type, action_params=None):
        return [ctx.MrExec] + (action_params or []) + ctx.getOptions(APP_EXEC, op_type, init_value=[])

    @classmethod
    def getActionArgs(cls, type, ctx, loader_cmd=None):
        op_cls = MetaMROperator.get(type)
        action = op_cls.get_shell_params(ctx)
        if loader_cmd:
            action.append(loader_cmd)
        return action

    @classmethod
    def getShellCommand(cls, type, ctx, loader_cmd=None):
        op_cls = MetaMROperator.get(type)
        action = cls.getActionArgs(type, ctx, loader_cmd)
        shellCommand = cls.getShellBase(ctx, op_cls.op_type, action)
        return shellCommand

    @classmethod
    def getQueryUrl(cls, tables, ctx):
        assert ctx.ProxyServer is not None
        host, port = ctx.ProxyServer.split(":", 1)
        appendModifier = ",append" if ctx.AppendMode else ""
        sortedModifier = ",sorted" if ctx.SortedOutput else ""
        location = "/table/" + \
            ";".join(urllib.quote_plus(t) + appendModifier +
                     sortedModifier for t in tables)
        return "http://%s:%s%s" % (host, port, location)

    @classmethod
    def getQueryString(cls, ctx):
        params = ctx.getOptions(APP_PROXY, OP_IO, init_value={})
        return urllib.urlencode(params)

    @classmethod
    def __run_operation(cls, type, operation, ctx):
        with cls.MapReduceDistributiveClass(type, operation, ctx) as dist:
            ctx.update(exportedFiles=list(dist.getFiles()))
            shellCommand = cls.getShellCommand(
                type, ctx, loader_cmd=dist.getLoaderShell())
            ctx.runOS(shellCommand)
        if ctx.SortMode:
            for t in ctx.DestTables:
                cls.__internal_sort_table(ctx.updated_copy(
                    sourceTables=[t], destTables=[t]))

    @classmethod
    def __run_joint_operation(cls, mapper, reducer, ctx):
        map_type, map_operation = mapper
        reduce_type, reduce_operation = reducer
        with cls.MapReduceDistributiveClass(map_type, map_operation, ctx.updated_copy(enableTableSwitching=False)) as map_dist, \
                cls.MapReduceDistributiveClass(reduce_type, reduce_operation, ctx) as reduce_dist:
            ctx.update(exportedMapFiles=list(map_dist.getFiles()))
            ctx.update(exportedReduceFiles=list(reduce_dist.getFiles()))
            action = cls.getActionArgs(map_type, ctx, loader_cmd=map_dist.getLoaderShell()) + \
                cls.getActionArgs(reduce_type, ctx,
                                  loader_cmd=reduce_dist.getLoaderShell())
            shellCommand = cls.getShellBase(
                ctx, MetaMROperator.get(map_type).op_type, action)
            ctx.runOS(shellCommand)
        if ctx.SortMode:
            for t in ctx.DestTables:
                cls.__internal_sort_table(ctx.updated_copy(
                    sourceTables=[t], destTables=[t]))

    @classmethod
    def __test_operation(cls, type, operation, sample, ctx):
        with cls.MapReduceDistributiveClass(type, operation, ctx) as dist:
            process = ctx.popen(dist.getLoaderShell(
            ), shell=True, cwd=dist.directory, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
            threading.Thread(target=ctx.printRecords, args=(
                sample, process.stdin, True)).start()
            for rec in RecordTokenizer(process.stdout, ctx):
                yield rec
            # if process.wait() != 0:
            #    raise RuntimeError("non-zero exit code")

    @classmethod
    def __test_joint_operation(cls, mapper, reducer, sample, ctx):
        map_type, map_operation = mapper
        reduce_type, reduce_operation = reducer
        sample = list(cls.__test_operation(map_type, map_operation,
                                           sample, ctx.updated_copy(enableTableSwitching=False)))
        sample = sorted(sample, key=lambda rec: rec.items())
        return cls.__test_operation(reduce_type, reduce_operation, sample, ctx)

    @classmethod
    def __test_mr_operation(cls, type, operation, ctx):
        if not ctx.RecordsCountLimit:
            ctx.update(count=100)
        sample = cls.__get_sample(ctx)
        if type == "reduce":
            sample = sorted(sample, key=lambda rec: rec.items())
        if ctx.Server == "local":
            # avoids "Cannot lock current folder" error
            sample = list(sample)
        out = cls.__test_operation(type, operation, sample, ctx)
        cls.__update_table(out, ctx)

    @classmethod
    def __test_joint_mr_operation(cls, mapper, reducer, ctx):
        if not ctx.RecordsCountLimit:
            ctx.update(count=100)
        sample = cls.__get_sample(ctx)
        if ctx.Server == "local":
            # avoids "Cannot lock current folder" error
            sample = list(sample)
        out = cls.__test_joint_operation(mapper, reducer, sample, ctx)
        cls.__update_table(out, ctx)

    @classmethod
    def __get_sample(cls, ctx):
        ntables = len(ctx.SourceTables)
        count = ctx.RecordsCountLimit or None
        for idx, table in enumerate(ctx.SourceTables):
            if count is None:
                for rec in cls.__get_sample_from_table(ctx.updated_copy(sourceTables=[table])):
                    yield rec
            else:
                for rec in cls.__get_sample_from_table(ctx.updated_copy(sourceTables=[table], count=count / (ntables - idx))):
                    yield rec
                    count -= 1

    @classmethod
    def __get_sample_from_table(cls, ctx):
        assert len(ctx.SourceTables) == 1
        stream, child = None, None
        if ctx.FixedKey:
            shellCommand = [ctx.MrFindExec] + ctx.getOptions(APP_FIND, OP_IO, init_value=[]) + [
                ctx.SourceTables[0], ctx.FixedKey]
            child = ctx.popen(shellCommand, stdout=subprocess.PIPE)
            stream = child.stdout
        elif ctx.ProxyServer:
            readUrl = cls.getQueryUrl(
                ctx.SourceTables, ctx) + "/?" + cls.getQueryString(ctx)
            ctx.logMessage("HTTP get %s", readUrl)
            stream = urllib2.urlopen(readUrl)
        else:
            shellCommand = cls.getShellBase(
                ctx, OP_IO, ["-read",  ctx.SourceTables[0]])
            child = ctx.popen(shellCommand, stdout=subprocess.PIPE)
            stream = child.stdout

        try:
            for rec in itertools.islice(RecordTokenizer(stream, ctx), ctx.RecordsCountLimit):
                yield rec
        finally:
            if child is not None and child.poll():
                raise OSError("non-zero exit code: %d" % child.poll())
            stream.close()
            if child is not None:
                time.sleep(0.001)
                if child.poll() == None:
                    child.kill()
                    child.wait()

    @classmethod
    def __get_range(cls, ctx):
        return itertools.chain.from_iterable(cls.__get_range_from_table(ctx.updated_copy(sourceTables=[table])) for table in ctx.SourceTables)

    @classmethod
    def __get_range_from_table(cls, ctx):
        assert len(ctx.SourceTables) == 1
        if ctx.ProxyServer:
            location = cls.getQueryUrl(
                ctx.SourceTables, ctx) + "/?" + cls.getQueryString(ctx)
            ctx.logMessage("HTTP get %s", location)
            stream = urllib2.urlopen(location)
        else:
            shellCommand = cls.getShellBase(
                ctx, OP_IO, ["-read", ctx.SourceTables[0]])
            child = ctx.popen(shellCommand, stdout=subprocess.PIPE)
            stream = child.stdout
        return RecordTokenizer(stream, ctx)

    @classmethod
    def __update_table(cls, records, ctx):
        if not ctx.DestTables:
            raise RuntimeError("empty updating tables list")
        if ctx.SortedOutput and ctx.AppendMode:
            raise RuntimeError("can't use sortedOutput with append mode")
        stream, child = None, None
        if ctx.ProxyServer:
            location = cls.getQueryUrl(
                ctx.DestTables, ctx) + "/?" + cls.getQueryString(ctx)
            ctx.logMessage("HTTP post %s", location)
            stream = HttpChunkedStream(location)
        else:
            if len(ctx.DestTables) == 1:
                writeOptions = [
                    "-writesorted" if ctx.SortedOutput else "-write", ctx.DestTables[0]]
            else:
                writeOptions = ["-write"]
                dstAttr = "-dstsorted" if ctx.SortedOutput \
                    else "-dstappend" if ctx.AppendMode \
                    else "-dst"
                for t in ctx.DestTables:
                    writeOptions += [dstAttr, t]
            shellCommand = cls.getShellBase(ctx, OP_IO, writeOptions)
            child = ctx.popen(shellCommand, stdin=subprocess.PIPE)
            stream = child.stdin
        try:
            ctx.printRecords(records, stream, close_stream=False)
        except:
            # atomic update: do cleanup on error

            # we are explicitly killing mapreduce process
            if child is not None:
                child.kill()

            # we are not closing HttpChunkedStream in proxy mode in case
            # of error thus preventing the data from being commited
            if ctx.ProxyServer:
                stream.abort()

            raise
        else:
            stream.close()

        if child is not None and 0 != child.wait():
            raise RuntimeError("nonzero exit code")
        if ctx.SortMode and not ctx.SortedOutput:
            for table in ctx.DestTables:
                cls.__internal_sort_table(ctx.updated_copy(
                    sourceTables=[table], destTables=[table]))

    @classmethod
    def __drop_table(cls, ctx):
        assert 1 == len(ctx.DestTables)
        shellCommand = cls.getShellBase(
            ctx, OP_MODIF, ["-drop", ctx.DestTables[0]])
        ctx.runOS(shellCommand)

    @classmethod
    def __lock_table(cls, ctx):
        assert 1 == len(ctx.DestTables)
        shellCommand = cls.getShellBase(
            ctx, OP_MODIF, ["-lock", ctx.DestTables[0]])
        ctx.runOS(shellCommand)

    @classmethod
    def __unlock_table(cls, ctx):
        assert 1 == len(ctx.DestTables)
        shellCommand = cls.getShellBase(
            ctx, OP_MODIF, ["-unlock", ctx.DestTables[0]])
        ctx.runOS(shellCommand)

    @classmethod
    def __internal_sort_table(cls, ctx):
        assert 1 == len(ctx.DestTables) == len(ctx.SourceTables)
        shellCommand = cls.getShellCommand("internal-sort", ctx)
        ctx.runOS(shellCommand)

    @classmethod
    def __sort_table(cls, ctx):
        assert 1 == len(ctx.DestTables) == len(ctx.SourceTables)
        shellCommand = cls.getShellCommand("sort", ctx)
        ctx.runOS(shellCommand)

    @classmethod
    def __copy_tables(cls, ctx):
        assert 1 == len(ctx.DestTables)
        shellCommand = cls.getShellCommand(
            "sort" if ctx.SortMode else "copy", ctx)
        ctx.runOS(shellCommand)

    @classmethod
    def __move_table(cls, ctx):
        assert 1 == len(ctx.SourceTables) and 1 == len(ctx.DestTables)
        shellCommand = cls.getShellCommand("move", ctx)
        ctx.runOS(shellCommand)

    @classmethod
    def __merge_tables(cls, ctx):
        assert 1 == len(ctx.DestTables)
        shellCommand = cls.getShellCommand("merge", ctx)
        ctx.runOS(shellCommand)

    @classmethod
    def __defrag_table(cls, ctx):
        assert 1 == len(ctx.SourceTables) and 1 == len(ctx.DestTables)
        shellCommand = cls.getShellCommand("defrag", ctx)
        ctx.runOS(shellCommand)
        if ctx.SortMode:
            cls.__internal_sort_table(ctx)

    @classmethod
    def __get_tables(cls, cmd, ctx):
        assert 1 == len(cmd.keys())
        action = list(itertools.chain(["-list"], *cmd.items()))
        shellCommand = cls.getShellBase(ctx, OP_RDCFG, action)
        child = ctx.popen(shellCommand, stdout=subprocess.PIPE)
        tables = [s.strip() for s in child.stdout]
        if child.wait():
            raise subprocess.CalledProcessError(child.returncode, shellCommand)
        return tables

    def execute(self, run_mode, proc_mode, *args, **kws):
        ctx = self.default_options.updated_copy(**kws)
        # signature [def testOp(cls, operation, sample, **kws)]
        if run_mode == "test":
            operation = args[0] if len(args) > 0 else kws.pop("operation")
            sample = args[1] if len(args) > 1 else kws.pop("sample")
            return self.__test_operation(proc_mode, operation, sample, ctx)
        elif run_mode == "run":  # signature [def runOp(cls, operation, **kws)]
            operation = args[0] if len(args) > 0 else kws.pop("operation")
            if ctx.TestMode:
                self.__test_mr_operation(proc_mode, operation, ctx)
            else:
                self.__run_operation(proc_mode, operation, ctx)

    def execute_joint(self, run_mode, map_proc_mode, reduce_proc_mode, *args, **kws):
        ctx = self.default_options.updated_copy(**kws)
        # signature [def testOp(cls, operation, sample, **kws)]
        if run_mode == "test":
            map_operation = args[0] if len(
                args) > 0 else kws.pop("map_operation")
            reduce_operation = args[1] if len(
                args) > 1 else kws.pop("reduce_operation")
            sample = args[2] if len(args) > 2 else kws.pop("sample")
            return self.__test_joint_operation((map_proc_mode, map_operation), (reduce_proc_mode, reduce_operation), sample, ctx)
        elif run_mode == "run":  # signature [def runOp(cls, operation, **kws)]
            map_operation = args[0] if len(
                args) > 0 else kws.pop("map_operation")
            reduce_operation = args[1] if len(
                args) > 1 else kws.pop("reduce_operation")
            if ctx.TestMode:
                self.__test_joint_mr_operation(
                    (map_proc_mode, map_operation), (reduce_proc_mode, reduce_operation), ctx)
            else:
                self.__run_joint_operation(
                    (map_proc_mode, map_operation), (reduce_proc_mode, reduce_operation), ctx)

    def testMap(self, *args, **kws): return self.execute("test",
                                                         "map", *args, **kws)

    def runMap(self, *args, **kws): return self.execute("run",
                                                        "map", *args, **kws)

    def testCombine(self, *args, **kws): return self.execute("test",
                                                             "combined_map", *args, **kws)

    def runCombine(self, *args, **kws): return self.execute("run",
                                                            "combined_map", *args, **kws)

    def testReduce(self, *args, **kws): return self.execute("test",
                                                            "reduce", *args, **kws)

    def runReduce(self, *args, **kws): return self.execute("run",
                                                           "reduce", *args, **kws)

    def testCombinedReduce(
        self, *args, **kws): return self.execute("test", "combined_reduce", *args, **kws)

    def runCombinedReduce(
        self, *args, **kws): return self.execute("run", "combined_reduce", *args, **kws)

    def testMapReduce(
        self, *args, **kws): return self.execute_joint("test", "map", "reduce", *args, **kws)

    def runMapReduce(
        self, *args, **kws): return self.execute_joint("run", "map", "reduce", *args, **kws)

    def testCombinedMapReduce(self, *args, **kws): return self.execute_joint(
        "test", "combined_map", "combined_reduce", *args, **kws)

    def runCombinedMapReduce(self, *args, **kws): return self.execute_joint(
        "run", "combined_map", "combined_reduce", *args, **kws)

    def useServer(self, servername):
        self.default_options.update(server=servername)

    def useDefaults(self, **kws):
        self.default_options.update(**kws)

    def getSample(self, srcTable=None, **kws):
        return self.__get_sample(self.default_options.updated_copy(srcTable=srcTable, **kws))

    def getSampleFromTable(self, srcTable=None, **kws):
        return self.__get_sample_from_table(self.default_options.updated_copy(srcTable=srcTable, **kws))

    def getRange(self, srcTable=None, **kws):
        return self.__get_range(self.default_options.updated_copy(srcTable=srcTable, **kws))

    def updateTable(self, records, dstTable=None, **kws):
        return self.__update_table(records, self.default_options.updated_copy(dstTable=dstTable, **kws))

    def dropTable(self, dstTable, **kws):
        return self.__drop_table(self.default_options.updated_copy(dstTable=dstTable, **kws))

    def lockTable(self, dstTable, **kws):
        return self.__lock_table(self.default_options.updated_copy(dstTable=dstTable, **kws))

    def unlockTable(self, dstTable, **kws):
        return self.__unlock_table(self.default_options.updated_copy(dstTable=dstTable, **kws))

    def dropTables(self, dstTables, **kws):
        if isinstance(dstTables, types.StringTypes):
            raise TypeError("dstTables argument must has list type")
        for t in dstTables:
            self.dropTable(t, **kws)

    def sortTable(self, srcTable, **kws):
        return self.__sort_table(self.default_options.updated_copy(
            srcTable=srcTable,
            dstTable=kws.pop('dstTable') if 'dstTable' in kws else srcTable,
            **kws))

    def sortTables(self, dstTables, **kws):
        if isinstance(dstTables, types.StringTypes):
            raise TypeError("dstTables argument must has list type")
        for t in dstTables:
            self.sortTable(t, **kws)

    def copyTable(self, srcTable, dstTable, **kws):
        return self.__copy_tables(self.default_options.updated_copy(srcTable=srcTable, dstTable=dstTable, **kws))

    def copyTables(self, srcTables, dstTable, **kws):
        return self.__copy_tables(self.default_options.updated_copy(srcTables=srcTables, dstTable=dstTable, **kws))

    def mergeTables(self, srcTables, dstTable, **kws):
        return self.__merge_tables(self.default_options.updated_copy(srcTables=srcTables, dstTable=dstTable, **kws))

    def moveTable(self, srcTable, dstTable, **kws):
        return self.__move_table(self.default_options.updated_copy(srcTable=srcTable, dstTable=dstTable, **kws))

    def renameTable(self, srcTable, dstTable, **kws):
        return self.__move_table(self.default_options.updated_copy(srcTable=srcTable, dstTable=dstTable, failOnEmptySrc=True, **kws))

    def defragTable(self, srcTable, **kws):
        return self.__defrag_table(self.default_options.updated_copy(
            srcTable=srcTable,
            dstTable=kws.pop('dstTable') if 'dstTable' in kws else srcTable,
            **kws
        ))

    def getTables(self, prefix=""):
        return self.__get_tables({"-prefix": prefix}, self.default_options.updated_copy())

    def getTablesByOwner(self, owner):
        return self.__get_tables({"-user": owner}, self.default_options.updated_copy())

    def getExactTable(self, exact):
        return self.__get_tables({"-exact": exact}, self.default_options.updated_copy())

    def getTablesBySuffix(self, suffix):
        return self.__get_tables({"-suffix": suffix}, self.default_options.updated_copy())

    def getTablesInfo(self, mask=None, prefix=None):
        """returns list of objects with tables information incapsulated
            mask - regex for table name (used in mr_ls)
            prefix - prefix for table name (used in mapreduce -list)
        """
        ctx = self.default_options.copy()
        if prefix is not None:
            action = ["-list", "-prefix", prefix, "-jsonoutput"]
            shellCommand = self.getShellBase(ctx, OP_RDCFG, action)
            fmt = "json"
        else:
            action = ["-l", "-ts", "-comp", "-e"]
            if mask:
                action += [mask]
            shellCommand = [ctx.MrLsExec] + action + \
                ctx.getOptions(APP_LS, OP_IO, init_value=[])
            fmt = "ls"
        child = ctx.popen(shellCommand, stdout=subprocess.PIPE)
        if fmt == "ls":
            tables = [TableInfo(l, fmt) for l in [s.strip()
                                                  for s in child.stdout]]
        else:
            tables = [TableInfo(dct, fmt) for dct in json.load(child.stdout)]
        if child.wait():
            raise subprocess.CalledProcessError(child.returncode, shellCommand)
        return tables

    def getTableInfo(self, table, **kws):
        ctx = self.default_options.updated_copy(**kws)
        for info in self.getTablesInfo(prefix=table):
            if info.name == table:
                return info
        if ctx.FailOnEmptySrc:
            return None
        else:
            return DefaultTableInfo(table)


MapReduce = MapReduceClient()


class TemporaryTable(TemporaryObject):

    def __init__(self, prefix="", suffix="", project="tmp", debug=False, client=None):
        self.name = None
        self.client = client or MapReduce
        self.__dropOnClose = not debug
        isEmptyChecked = False
        while not isEmptyChecked:
            if not debug or debug is True:
                rndName = self.random_string()
            else:
                rndName = "%s-%s" % (hashlib.md5(socket.getfqdn() + os.path.abspath(
                    sys.argv[0]) + str(hash(debug))).hexdigest(), self.get_login())
                isEmptyChecked = True
            tmpName = os.path.join(project, "%s.%s%s" %
                                   (prefix, rndName, suffix))
            if not isEmptyChecked:
                sample = list(self.client.getSample(tmpName, count=1))
                if len(sample) == 0:
                    isEmptyChecked = True
        self.name = tmpName

    def __enter__(self):
        return self

    def __exit__(self, evalue, etype, eframe):
        if self.name and self.__dropOnClose:
            self.client.dropTable(dstTable=self.name)


class Dispatchable(object):
    INIT_METHOD = "init"
    RESTORE_METHOD = "restore"
    DEFAULT_METHOD = "call"
    INIT_ARGS_PRM = "init_arguments"
    init_arguments = frozenset()

    def __init__(self, *args, **kws):
        dispatchString = kws.get("dispatch", self.DEFAULT_METHOD)
        if not callable(getattr(self, dispatchString, None)):
            raise RuntimeError(
                "incorrect dispatch argument: %s" % dispatchString)
        self.dispatchString = dispatchString
        self.__set_attributes(getattr(self, kws.get(
            "init_args_prm", self.INIT_ARGS_PRM), set()), **kws)
        getattr(self, self.INIT_METHOD)(*args, **kws)

    def __call__(self, *args, **kws):
        dispatchString = getattr(self, "dispatchString", self.DEFAULT_METHOD)
        self.__call__ = dsp_method = getattr(self, dispatchString)
        return dsp_method(*args, **kws)

    def __repr__(self):
        return "<'{0}' instance>".format(type(self).__name__)

    def __setstate__(self, sdict):
        """
        See https://docs.python.org/2/library/pickle.html#object.__setstate__ for details.
        """
        self.__dict__.update(sdict)
        getattr(self, self.RESTORE_METHOD)()

    def __set_attributes(self, permit_attributes, **kws):
        for argname, argval in kws.iteritems():
            if argname in permit_attributes:
                setattr(self, argname, argval)

    def init(self, *args, **kws):
        """Callback on object initialization"""
        pass

    def restore(self):
        """Callback on object restoring/deserialization"""
        pass

    def call(self):
        """Callback on object call"""
        pass


class IMRFunction(Dispatchable):
    RESTORE_METHOD = "start"
    DEFAULT_METHOD = "do"

    def start(self):
        """Callback on job start/initialization"""
        pass

    def do(self, *args, **kws):
        """Callback on record(s) processing"""
        pass


class MRSession(Dispatchable):
    RESTORE_METHOD = "initJob"
    DEFAULT_METHOD = "default"
    reset_arguments = frozenset()

    def __init__(self, *args, **kws):
        dispatchString = kws.get("dispatch", self.DEFAULT_METHOD)
        if dispatchString == self.DEFAULT_METHOD:
            self.INIT_ARGS_PRM = "reset_arguments"
            self.INIT_METHOD = "reset"
        super(MRSession, self).__init__(*args, **kws)
        if dispatchString == self.DEFAULT_METHOD:
            self()

    def __repr__(self):
        return "<'{0}' instance (dispatch='{1}')>".format(type(self).__name__, self.dispatchString)

    def initJob(self):
        """Callback on job initialization"""
        pass

    def reset(self, *args, **kws):
        """Callback on session object initialization"""
        pass

    def default(self):
        """Standard actions list"""
        pass

    """Usefull methods"""
    @staticmethod
    def skipRecords(recs):
        for r in recs:
            pass

    @staticmethod
    def countRecords(recs):
        return sum(1 for r in recs)


def _InitializeDefaultLogger():
    logger = logging.getLogger("mapreducelib.default")
    logger.setLevel(logging.DEBUG)
    handler = logging.StreamHandler()
    handler.setFormatter(logging.Formatter("%(asctime)s - %(message)s"))
    logger.addHandler(handler)


_InitializeDefaultLogger()
