#define _XOPEN_SOURCE 500

#include <alloca.h>
#include <dlfcn.h>
#include <errno.h>
#include <fcntl.h>
#include <jni.h>

#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <contrib/libs/zstd/include/zstd.h>

#include <unistd.h>

#include <openssl/evp.h>

#include <syscall.h>
#include <sys/resource.h>
#include <sys/time.h>

#include "compress.h"
#include "main.h"

#define MAX_VINT_LENGTH 5
#define INT_MASK 0xFFFFFFFFL
#define INT_SHIFT 32

#define BLOCKINFO(packedSize, unpackedSize) \
    (packedSize & INT_MASK) | ((long long)(unpackedSize & INT_MASK) << INT_SHIFT)

#define AES_BLOCK_SIZE EVP_CIPHER_block_size(EVP_aes_128_ecb());

enum {
    IOPRIO_CLASS_NONE,
    IOPRIO_CLASS_RT,
    IOPRIO_CLASS_BE,
    IOPRIO_CLASS_IDLE,
};

enum {
    IOPRIO_WHO_PROCESS = 1,
    IOPRIO_WHO_PGRP,
    IOPRIO_WHO_USER,
};

#define IOPRIO_CLASS_SHIFT (13)
#define IOPRIO_PRIO_MASK    ((1UL << IOPRIO_CLASS_SHIFT) - 1)
#define IOPRIO_PRIO_CLASS(mask) ((mask) >> IOPRIO_CLASS_SHIFT)
#define IOPRIO_PRIO_DATA(mask)  ((mask) & IOPRIO_PRIO_MASK)
#define IOPRIO_PRIO_VALUE(class, data)  (((class) << IOPRIO_CLASS_SHIFT) | data)

static unsigned char key[] = "yes_i_want_this_";
static unsigned char iv[] = "ugu_567887654322";

typedef struct {
    char* buffer;
    int size;
} TmpBuffer;

static __thread TmpBuffer tlsReadBuffer;
static __thread TmpBuffer tlsCompressBuffer;
static __thread ZSTD_CCtx* zstdCompressCtx;
static __thread ZSTD_DCtx* zstdDecompressCtx;

static bool hasTcmalloc = false;

compress2Func_t pCompress2;
inflateFunc_t pInflate;
inflateInit_Func_t pInflateInit2;
inflateSetDictionary_Func_t pInflateSetDictionary;
inflateResetFunc_t pInflateReset;
inflateEndFunc_t pInflateEnd;

deflateInit2_Func_t pDeflateInit2;
deflateParams_Func_t pDeflateParams;
deflateSetDictionary_Func_t pDeflateSetDictionary;
deflate_Func_t pDeflate;
deflateReset_Func_t pDeflateReset;
deflateEnd_Func_t pDeflateEnd;

MallocExtension_ReleaseFreeMemory_t pMallocExtension_ReleaseFreeMemory;
MallocExtension_GetStats_t pMallocExtension_GetStats;

__attribute__((__visibility__("hidden")))
ZSTD_DCtx *getZstdDecompressCtx() {
    if (zstdDecompressCtx == NULL) {
        zstdDecompressCtx = ZSTD_createDCtx();
    }
    return zstdDecompressCtx;
}

__attribute__((__visibility__("hidden")))
ZSTD_CCtx *getZstdCompressCtx() {
    if (zstdCompressCtx == NULL) {
        zstdCompressCtx = ZSTD_createCCtx();
    }
    return zstdCompressCtx;
}

__attribute__((__visibility__("hidden")))
ssize_t PreadFull(
    int fd,
    void* buf,
    size_t count,
    off_t offset)
{
    ssize_t totalRead = 0;
    while (count) {
        ssize_t read = pread(fd, buf, count, offset);
        if (read == -1) {
            return -1;
        }
        if (read == 0) {
            break;
        }
        totalRead += read;
        buf += read;
        offset += read;
        count -= read;
    }
    return totalRead;
}

__attribute__((__visibility__("hidden")))
ssize_t PwriteFull(
    int fd,
    const void* buf,
    size_t count,
    off_t offset)
{
    ssize_t totalWritten = 0;
    while (count) {
        ssize_t written = pwrite(fd, buf, count, offset);
        if (written == -1) {
            return -1;
        }
        if (written == 0) {
            break;
        }
        totalWritten += written;
        buf += written;
        offset += written;
        count -= written;
    }
    return totalWritten;
}

__attribute__((__visibility__("hidden")))
char *getTlsBuffer(int size, TmpBuffer *buffer) {
    if (buffer->size < size) {
        if (buffer->buffer != NULL) {
            free(buffer->buffer);
        }
        buffer->buffer = malloc(size);
        buffer->size = size;
    }
    return buffer->buffer;
}


__attribute__((__visibility__("hidden")))
char *getTlsReadBuffer(int size) {
    return getTlsBuffer(size, &tlsReadBuffer);
}

__attribute__((__visibility__("hidden")))
char *getTlsCompressBuffer(int size) {
    return getTlsBuffer(size, &tlsCompressBuffer);
}

__attribute__((__visibility__("hidden")))
int sse41Available(void);

__attribute__((__visibility__("hidden")))
bool tcmallocInit(
    const char* lib,
    const char *ldLibraryPath)
{
    char *modifiablePath = strdupa(ldLibraryPath);
    void *dl = NULL;

    const char *singlePath;
    const char *err = "success";
    while ((singlePath = strsep(&modifiablePath, ":")) != NULL) {
        int len = snprintf(NULL, 0, "%s/%s", singlePath, lib) + 1;
        char *path = (char*) alloca(len);
        snprintf(path, len, "%s/%s", singlePath, lib);
        fprintf(stderr, "load_tcmalloc: trying path: %s\n", path);
        dl = dlopen(path, RTLD_NOW | RTLD_LOCAL);
        if (dl != NULL) {
            fprintf(
                stderr,
                "load_tcmalloc: loaded library from %s\n",
                path);
            break;
        }
    }
    if (dl == NULL) {
        //try default paths
        dl = dlopen(lib, RTLD_NOW | RTLD_LOCAL);
        if (dl == NULL) {
            err = "Can't load tcmalloc";
            goto perror;
        } else {
            fprintf(
                stderr,
                "load_tcmalloc: loaded library %s from system path\n",
                lib);
        }
    }

    MallocExtension_ReleaseFreeMemory_t pMallocExtension_ReleaseFreeMemoryLocal;
    MallocExtension_GetStats_t pMallocExtension_GetStatsLocal;

    pMallocExtension_ReleaseFreeMemoryLocal =
        (MallocExtension_ReleaseFreeMemory_t) dlsym(
            dl,
            "MallocExtension_ReleaseFreeMemory");
    if (pMallocExtension_ReleaseFreeMemoryLocal == NULL) {
        err = "Can't reslove MallocExtension_ReleaseFreeMemoryLocal function";
        goto perror;
    }

    pMallocExtension_GetStatsLocal =
        (MallocExtension_GetStats_t) dlsym(
            dl,
            "MallocExtension_GetStats");
    if (pMallocExtension_GetStatsLocal == NULL) {
        err = "Can't reslove MallocExtension_GetStatsLocal function";
        goto perror;
    }

    pMallocExtension_ReleaseFreeMemory = pMallocExtension_ReleaseFreeMemoryLocal;
    pMallocExtension_GetStats = pMallocExtension_GetStatsLocal;

    return true;

perror:
    fprintf(stderr, "load_tcmalloc: error: %s: %s\n", err, dlerror());
    fprintf(stderr, "ldLibraryPath=%s\n", ldLibraryPath);
    if (dl != NULL) {
        dlclose(dl);
    }
    return false;
}

__attribute__((__visibility__("hidden")))
int loadFastZlib(const char *ldLibraryPath) {
    const char *lib = "libzlib_fast.so";
    char *modifiablePath = strdupa(ldLibraryPath);
    void *dl = NULL;
    compress2Func_t pCompress2Local;
    inflateFunc_t pInflateLocal;
    inflateInit_Func_t pInflateInit2Local;
    inflateSetDictionary_Func_t pInflateSetDictionaryLocal;
    inflateResetFunc_t pInflateResetLocal;
    inflateEndFunc_t pInflateEndLocal;
    deflateInit2_Func_t pDeflateInit2Local;
    deflateParams_Func_t pDeflateParamsLocal;
    deflateSetDictionary_Func_t pDeflateSetDictionaryLocal;
    deflate_Func_t pDeflateLocal;
    deflateReset_Func_t pDeflateResetLocal;
    deflateEnd_Func_t pDeflateEndLocal;

    const char *singlePath;
    const char *err = "success";
    while ((singlePath = strsep(&modifiablePath, ":")) != NULL) {
        int len = snprintf(NULL, 0, "%s/%s", singlePath, lib) + 1;
        char *path = (char*) alloca(len);
        snprintf(path, len, "%s/%s", singlePath, lib);
        fprintf(stderr, "loadFastZlib: trying path: %s\n", path);
        dl = dlopen(path, RTLD_NOW | RTLD_LOCAL);
        if (dl != NULL) {
            fprintf(stderr, "loadFastZlib: loaded library from %s\n", path);
            break;
        }
    }
    if (dl == NULL) {
        //try default paths
        dl = dlopen(lib, RTLD_NOW | RTLD_LOCAL);
        err = "Can't load zlib_fast";
        goto perror;
    }

    pCompress2Local = (compress2Func_t) dlsym(dl, "compress2_fast");
    if (pCompress2Local == NULL) {
        err = "Can't reslove compress2_fast function";
        goto perror;
    }

    pInflateLocal = (inflateFunc_t) dlsym(dl, "inflate2_fast");
    if (pInflateLocal == NULL) {
        err = "Can't reslove inflate2_fast function";
        goto perror;
    }

    pInflateInit2Local = (inflateInit_Func_t) dlsym(dl, "inflateInit2_fast_");
    if (pInflateInit2Local == NULL) {
        err = "Can't reslove inflateInit2_fast_ function";
        goto perror;
    }

    pInflateSetDictionaryLocal =
        (inflateSetDictionary_Func_t) dlsym(dl, "inflateSetDictionary_fast");
    if (pInflateSetDictionaryLocal == NULL) {
        err = "Can't reslove inflateSetDictionary_fast function";
        goto perror;
    }

    pInflateResetLocal =
        (inflateResetFunc_t) dlsym(dl, "inflateReset_fast");
    if (pInflateResetLocal == NULL) {
        err = "Can't reslove inflateReset_fast function";
        goto perror;
    }

    pInflateEndLocal = (inflateEndFunc_t) dlsym(dl, "inflateEnd2_fast");
    if (pInflateEndLocal == NULL) {
        err = "Can't reslove inflateEnd2_fast function";
        goto perror;
    }

    pDeflateInit2Local = (deflateInit2_Func_t) dlsym(dl, "deflateInit2_fast_");
    if (pDeflateInit2Local == NULL) {
        err = "Can't reslove deflateInit2_fast_ function";
        goto perror;
    }

    pDeflateParamsLocal =
        (deflateParams_Func_t) dlsym(dl, "deflateParams_fast");
    if (pDeflateParamsLocal == NULL) {
        err = "Can't reslove deflateParams_fast_ function";
        goto perror;
    }

    pDeflateSetDictionaryLocal =
        (deflateSetDictionary_Func_t) dlsym(dl, "deflateSetDictionary_fast");
    if (pDeflateSetDictionaryLocal == NULL) {
        err = "Can't reslove deflateSetDictionary_fast function";
        goto perror;
    }

    pDeflateLocal =
        (deflate_Func_t) dlsym(dl, "deflate_fast");
    if (pDeflateLocal == NULL) {
        err = "Can't reslove deflate_fast function";
        goto perror;
    }

    pDeflateResetLocal =
        (deflateReset_Func_t) dlsym(dl, "deflateReset_fast");
    if (pDeflateResetLocal == NULL) {
        err = "Can't reslove deflateReset_fast function";
        goto perror;
    }

    pDeflateEndLocal =
        (deflateEnd_Func_t) dlsym(dl, "deflateEnd_fast");
    if (pDeflateEndLocal == NULL) {
        err = "Can't reslove deflateEnd_fast function";
        goto perror;
    }

    pCompress2 = pCompress2Local;
    pInflate = pInflateLocal;
    pInflateInit2 = pInflateInit2Local;
    pInflateSetDictionary = pInflateSetDictionaryLocal;
    pInflateReset = pInflateResetLocal;
    pInflateEnd = pInflateEndLocal;
    pDeflateInit2 = pDeflateInit2Local;
    pDeflateParams = pDeflateParamsLocal;
    pDeflateSetDictionary = pDeflateSetDictionaryLocal;
    pDeflate = pDeflateLocal;
    pDeflateReset = pDeflateResetLocal;
    pDeflateEnd = pDeflateEndLocal;

    return 1;

perror:
    fprintf(stderr, "zlib_fast init error: %s: %s\n", err, dlerror());
    fprintf(stderr, "ldLibraryPath=%s\n", ldLibraryPath);
    if (dl != NULL) {
        dlclose(dl);
    }
    return 0;
}

__attribute__((__visibility__("hidden")))
void zlibInit(const char *ldLibraryPath) {
    pCompress2 = compress2;
    pInflate = inflate;
    pInflateInit2 = inflateInit2_;
    pInflateSetDictionary = inflateSetDictionary;
    pInflateReset = inflateReset;
    pInflateEnd = inflateEnd;
    pDeflateInit2 = deflateInit2_;
    pDeflateParams = deflateParams;
    pDeflateSetDictionary = deflateSetDictionary;
    pDeflate = deflate;
    pDeflateReset = deflateReset;
    pDeflateEnd = deflateEnd;

    if (sse41Available()) {
        if (loadFastZlib(ldLibraryPath)) {
            fprintf(stderr, "Using SSE optimized zlib\n");
        }
    } else {
        fprintf(stderr, "Using standard zlib\n");
    }
}

__attribute__((__visibility__("hidden")))
jint inflateByteArray
(jbyte* input,
jint inputlen,
jlong output,
jint outputlen)
{
    int ret;
    z_stream stream;

    memset(&stream, 0, sizeof stream);
    switch (doInflateInit(&stream, MAX_WBITS)) {
        case Z_OK:
            break;
        case Z_MEM_ERROR:
            return -1;
        default:
            fputs(stream.msg, stderr);
            return -2;
    }

    stream.avail_out = outputlen;
    stream.next_out = (Bytef*) output;
    stream.avail_in = inputlen;
    stream.next_in = (Bytef*)input;
    // initially there was Z_PARTIAL_FLUSH which is not allowed in manual:
    // http://www.zlib.net/manual.html
    ret = pInflate(&stream, Z_FINISH);
    switch (ret) {
        case Z_STREAM_END:
            ret = outputlen - stream.avail_out;
            break;
        case Z_OK:
            ret = -4;
            break;
        case Z_NEED_DICT:
            ret = -5;
            break;
        case Z_DATA_ERROR:
            ret = -6;
            break;
        case Z_STREAM_ERROR:
            ret = -7;
            break;
        case Z_MEM_ERROR:
            ret = -8;
            break;
        case Z_BUF_ERROR:
            ret = -9;
            break;
        default:
            ret = -10;
            break;
    }
    pInflateEnd(&stream);
    return ret;
}

__attribute__((__visibility__("default")))
JNIEXPORT void JNICALL Java_ru_yandex_compress_NativeCompress_zlibInit
(JNIEnv* env,
jclass class,
jstring ldLibraryPath)
{
    (void) env;
    (void) class;
    const char *nativeString = (*env)->GetStringUTFChars(env, ldLibraryPath, 0);
    zlibInit(nativeString);
    (*env)->ReleaseStringUTFChars(env, ldLibraryPath, nativeString);
}


__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_compress_NativeCompress_inflateByteArray
(JNIEnv* env,
jclass class,
jbyteArray input,
jint inputlen,
jlong output,
jint outputlen)
{
    (void) class;
    jbyte *array = (*env)->GetPrimitiveArrayCritical(env, input, 0);
    if (!array) {
        return -3;
    }
    jint ret = inflateByteArray(array, inputlen, output, outputlen);
    (*env)->ReleasePrimitiveArrayCritical(
        env,
        input,
        array,
        JNI_ABORT);
    return ret;
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_compress_NativeCompress_inflateBytes
(JNIEnv* env,
jclass class,
jlong input,
jint inputlen,
jlong output,
jint outputlen)
{
    (void) class;
    (void) env;
    jint ret = inflateByteArray((jbyte*) input, inputlen, output, outputlen);
    return ret;
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL JavaCritical_ru_yandex_compress_NativeCompress_inflateBytes
(jlong input,
jint inputlen,
jlong output,
jint outputlen)
{
    jint ret = inflateByteArray((jbyte*) input, inputlen, output, outputlen);
    return ret;
}


__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_compress_NativeCompress_inflateByteArray2
(JNIEnv* env,
jclass class,
jbyteArray input,
jint inputlen,
jbyteArray output)
{
    (void) class;
    jint ret = 0;
    jbyte *outputArray = 0;
    jbyte *inputArray = (*env)->GetPrimitiveArrayCritical(env, input, 0);
    if (!inputArray) {
        ret = -3;
        goto exit;
    }
    outputArray = (*env)->GetPrimitiveArrayCritical(env, output, 0);
    if (!outputArray) {
        ret = -4;
        goto exit;
    }
    jsize outputlen = (*env)->GetArrayLength(env, output);
    ret = inflateByteArray(inputArray, inputlen, (jlong) outputArray, outputlen);

exit:
    if (inputArray) {
        (*env)->ReleasePrimitiveArrayCritical(
            env,
            input,
            inputArray,
            JNI_ABORT);
    }
    if (outputArray) {
        (*env)->ReleasePrimitiveArrayCritical(
            env,
            output,
            outputArray,
            ret > 0 ? 0 : JNI_ABORT);
    }
    return ret;
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_compress_NativeCompress_deflateByteArray
(JNIEnv* env,
jclass class,
jbyteArray input,
jint inputlen,
jbyteArray output,
jint outputlen,
jint level)
{
    (void) class;
    jint ret = 0;
    jbyte *inputArray, *outputArray = NULL;
    inputArray = (*env)->GetPrimitiveArrayCritical(env, input, 0);
    if (!inputArray) {
        ret = -3;
        goto exit;
    }
    outputArray = (*env)->GetPrimitiveArrayCritical(env, output, 0);
    if (!outputArray) {
        ret = -4;
        goto exit;
    }
    uLongf outSize = outputlen;
    ret = pCompress2((Bytef*)outputArray, &outSize,
        (Bytef*)inputArray, inputlen, level);
    if (ret == Z_OK) {
        ret = outSize;
    } else {
        ret = ret - 100;
    }

exit:
    if (inputArray) {
        (*env)->ReleasePrimitiveArrayCritical(
            env,
            input,
            inputArray,
            JNI_ABORT);
    }
    if (outputArray) {
        (*env)->ReleasePrimitiveArrayCritical(
            env,
            output,
            outputArray,
            ret > 0 ? 0 : JNI_ABORT);
    }
    return ret;
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_compress_NativeCompress_deflateRawArray
(JNIEnv* env,
jclass class,
jlong input,
jint inputlen,
jlong output,
jint outputlen,
jint level)
{
    (void) class;
    (void) env;
    jint ret = 0;
    uLongf outSize = outputlen;
    ret = pCompress2((Bytef*)output, &outSize,
        (Bytef*)input, inputlen, level);
    if (ret == Z_OK) {
        ret = outSize;
    } else {
        ret = ret - 100;
    }
    return ret;
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL JavaCritical_ru_yandex_compress_NativeCompress_deflateRawArray
(jlong input,
jint inputlen,
jlong output,
jint outputlen,
jint level)
{
    jint ret = 0;
    uLongf outSize = outputlen;
    ret = pCompress2((Bytef*)output, &outSize,
        (Bytef*)input, inputlen, level);
    if (ret == Z_OK) {
        ret = outSize;
    } else {
        ret = ret - 100;
    }
    return ret;
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL JavaCritical_ru_yandex_compress_NativeCompress_inflateByteArray
(jint length,
jbyte* input,
jint inputlen,
jlong output,
jint outputlen)
{
    (void) length;
    return inflateByteArray(input, inputlen, output, outputlen);
}

__attribute__((__visibility__("hidden")))
int getVInt(void **buffer, void *bufferEnd, int *vint) {
    char *ptr = (char *)*buffer;
    const char *end = (const char *)bufferEnd;
    if (ptr >= end) {
        return -1;
    }
    char b = *ptr++;
    int i = b & 0x7F;
    for (int shift = 7; (b & 0x80) != 0; shift += 7) {
        if (ptr >= end) {
            return -1;
        }
        b = *ptr++;
        i |= (b & 0x7F) << shift;
    }
    *vint = i;
    *buffer = ptr;
    return 0;
}

jlong readBlockSizes(jint fd, jlong fPos) {
    char charBuffer[MAX_VINT_LENGTH << 1];
    void* buffer = charBuffer;
    ssize_t read = PreadFull(fd, buffer, sizeof charBuffer, fPos);
    if (read <= 0) {
        return BLOCKINFO(EOF_ERROR, EOF_ERROR);
    }
    void *bufferEnd = buffer + read;
    int packedSize, unpackedSize;
    if (getVInt(&buffer, bufferEnd, &packedSize) < 0
        || getVInt(&buffer, bufferEnd, &unpackedSize) < 0)
    {
        return BLOCKINFO(EOF_ERROR, EOF_ERROR);
    } else {
        return BLOCKINFO(packedSize, unpackedSize);
    }
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_compress_NativeCompress_readBlockSizes
(JNIEnv* env,
jclass class,
jint fd,
jlong fPos)
{
    (void) env;
    (void) class;
    return readBlockSizes(fd, fPos);
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL JavaCritical_ru_yandex_compress_NativeCompress_readBlockSizes
(jint fd,
jlong fPos)
{
    return readBlockSizes(fd, fPos);
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_compress_NativeCompress_pread
(JNIEnv* env,
jclass class,
jint fd,
jlong fPos,
jlong output,
jint len)
{
    (void) env;
    (void) class;
    return PreadFull(fd, (void *)output, len, fPos);
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL JavaCritical_ru_yandex_compress_NativeCompress_pread
(jint fd,
jlong fPos,
jlong input,
jint len)
{
    return PreadFull(fd, (void *)input, len, fPos);
}

__attribute__((__visibility__("default")))
JNIEXPORT jlong JNICALL Java_ru_yandex_compress_NativeCompress_pwrite0
(JNIEnv* env,
jclass class,
jint fd,
jlong fPos,
jlong output,
jint len,
jboolean fsync)
{
    (void) env;
    (void) class;
    jlong ret = PwriteFull(fd, (void *)output, len, fPos);
    if (fsync) {
        fdatasync(fd);
    }
    return ret;
}

__attribute__((__visibility__("default")))
JNIEXPORT jlong JNICALL JavaCritical_ru_yandex_compress_NativeCompress_pwrite0
(jint fd,
jlong fPos,
jlong output,
jint len,
jboolean fsync)
{
    jlong ret = PwriteFull(fd, (void *)output, len, fPos);
    if (fsync) {
        fdatasync(fd);
    }
    return ret;
}

__attribute__((__visibility__("hidden")))
jlong readAndInflateByteArray
(jint fd,
jlong fPos,
jint deflatedlen,
jlong output,
jint outputlen)
{
    void *readBuffer;

//    readBuffer = alloca(deflatedlen);
    readBuffer = getTlsReadBuffer(deflatedlen);
//    readBuffer = getTlsBuffer(deflatedlen, &tlsReadBuffer);
//    readBuffer = GET_TLS_READ_BUFFER(deflatedlen);
    int read = PreadFull(fd, readBuffer, deflatedlen, fPos);
    if (read == 0 || read < deflatedlen) {
        return EOF_ERROR;
    }
    if (read == -1) {
        return -errno;
    }

    return inflateByteArray(readBuffer, deflatedlen, output, outputlen);
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_compress_NativeCompress_readAndInflateByteArray
(JNIEnv* env,
jclass class,
jint fd,
jlong fPos,
jint deflatedlen,
jlong output,
jint outputlen)
{
    (void) env;
    (void) class;
    return readAndInflateByteArray(fd, fPos, deflatedlen, output, outputlen);
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL JavaCritical_ru_yandex_compress_NativeCompress_readAndInflateByteArray
(jint fd,
jlong fPos,
jint deflatedlen,
jlong output,
jint outputlen)
{
    return readAndInflateByteArray(fd, fPos, deflatedlen, output, outputlen);
}
//ZStandard
__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_compress_NativeCompress_zstdCompressByteArray
(JNIEnv* env,
jclass class,
jbyteArray input,
jint inputlen,
jbyteArray output,
jint outputlen,
int level)
{
    (void) class;
    jint ret = 0;
    jbyte *inputArray, *outputArray = NULL;
    inputArray = (*env)->GetPrimitiveArrayCritical(env, input, 0);
    if (!inputArray) {
        ret = -3;
        goto exit;
    }
    outputArray = (*env)->GetPrimitiveArrayCritical(env, output, 0);
    if (!outputArray) {
        ret = -4;
        goto exit;
    }
    ret = ZSTD_compressCCtx(
        getZstdCompressCtx(),
        (char*)outputArray,
        outputlen,
        (char*)inputArray,
        inputlen,
        level);
    if (ZSTD_isError(ret)) {
        if (ret > 0) {
            ret = -ret;
        }
    }

exit:
    if (inputArray) {
        (*env)->ReleasePrimitiveArrayCritical(
            env,
            input,
            inputArray,
            JNI_ABORT);
    }
    if (outputArray) {
        (*env)->ReleasePrimitiveArrayCritical(
            env,
            output,
            outputArray,
            ret > 0 ? 0 : JNI_ABORT);
    }
    return ret;
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_compress_NativeCompress_zstdCompressRawArray
(JNIEnv* env,
jclass class,
jlong input,
jint inputlen,
jlong output,
jint outputlen,
int level)
{
    (void) class;
    (void) env;
    jint ret = 0;
    ret = ZSTD_compressCCtx(
        getZstdCompressCtx(),
        (char*)output,
        outputlen,
        (char*)input,
        inputlen,
        level);
    if (ZSTD_isError(ret)) {
        if (ret > 0) {
            ret = -ret;
        }
    }
    return ret;
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL JavaCritical_ru_yandex_compress_NativeCompress_zstdCompressRawArray
(jlong input,
jint inputlen,
jlong output,
jint outputlen,
int level)
{
    jint ret = 0;
    ret = ZSTD_compressCCtx(
        getZstdCompressCtx(),
        (char*)output,
        outputlen,
        (char*)input,
        inputlen,
        level);
    if (ZSTD_isError(ret)) {
        if (ret > 0) {
            ret = -ret;
        }
    }
    return ret;
}

jint zstdReadAndDecompressByteArray
(jint fd,
jlong fPos,
jint deflatedlen,
jlong output,
jint outputlen)
{
    void *readBuffer;

//    readBuffer = alloca(deflatedlen);
    readBuffer = getTlsReadBuffer(deflatedlen);
    int read = PreadFull(fd, readBuffer, deflatedlen, fPos);
    if (read == 0 || read < deflatedlen) {
        return EOF_ERROR;
    }
    if (read == -1) {
        return -errno;
    }
    jint ret = ZSTD_decompressDCtx(
        getZstdDecompressCtx(),
        (char*)output,
        outputlen,
        (char*)readBuffer,
        deflatedlen);
    if (ZSTD_isError(ret)) {
        if (ret > 0) {
            ret = -ret;
        }
    }
    return ret;
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_compress_NativeCompress_zstdReadAndDecompressByteArray
(JNIEnv* env,
jclass class,
jint fd,
jlong fPos,
jint deflatedlen,
jlong output,
jint outputlen)
{
    (void) env;
    (void) class;
    return zstdReadAndDecompressByteArray(fd, fPos, deflatedlen, output, outputlen);
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL JavaCritical_ru_yandex_compress_NativeCompress_zstdReadAndDecompressByteArray
(jint fd,
jlong fPos,
jint deflatedlen,
jlong output,
jint outputlen)
{
    return zstdReadAndDecompressByteArray(fd, fPos, deflatedlen, output, outputlen);
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_compress_NativeCompress_zstdDecompressByteArray
(JNIEnv* env,
jclass class,
jbyteArray input,
jint inputlen,
jlong output,
jint outputlen)
{
    (void) class;
    jbyte *array = (*env)->GetPrimitiveArrayCritical(env, input, 0);
    if (!array) {
        return -1;
    }
    jint ret = ZSTD_decompressDCtx(
        getZstdDecompressCtx(),
        (Bytef*)output,
        outputlen,
        array,
        inputlen);
    (*env)->ReleasePrimitiveArrayCritical(
        env,
        input,
        array,
        JNI_ABORT);
    return ret;
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_compress_NativeCompress_zstdDecompressBytes
(JNIEnv* env,
jclass class,
jlong input,
jint inputlen,
jlong output,
jint outputlen)
{
    (void) class;
    (void) env;
    jint ret = ZSTD_decompressDCtx(
        getZstdDecompressCtx(),
        (Bytef*)output,
        outputlen,
        (Bytef*)input,
        inputlen);
    return ret;
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL JavaCritical_ru_yandex_compress_NativeCompress_zstdDecompressBytes
(jlong input,
jint inputlen,
jlong output,
jint outputlen)
{
    jint ret = ZSTD_decompressDCtx(
        getZstdDecompressCtx(),
        (Bytef*)output,
        outputlen,
        (Bytef*)input,
        inputlen);
    return ret;
}

__attribute__((__visibility__("default")))
JNIEXPORT jstring JNICALL Java_ru_yandex_compress_NativeCompress_zstdCodeToString
(JNIEnv* env,
jclass class,
jint code)
{
    (void) class;
    return (*env)->NewStringUTF(env, ZSTD_getErrorName(code));
}

//AES+DEFLATE
jint aesflateDecompress
(jbyte* input,
jint inputlen,
jlong output,
jint outputlen)
{
    int ret;

//    char *decrypted = alloca(inputlen + 32);
    char *decrypted = getTlsCompressBuffer(inputlen + 32);
    int decryptedLen = inputlen + 32;
    EVP_CIPHER_CTX* ctx = EVP_CIPHER_CTX_new();
    EVP_CIPHER_CTX_init(ctx);
    EVP_DecryptInit_ex(ctx, EVP_aes_128_cbc(), NULL, key, iv);
    EVP_CIPHER_CTX_set_padding(ctx, 1);
    if(!EVP_DecryptUpdate(
        ctx,
        (unsigned char*) decrypted,
        &decryptedLen,
        (const unsigned char*) input,
        inputlen))
    {
        /* Error */
        ret = -100;
    } else {
        int padding = inputlen - decryptedLen + 32;
        if(!EVP_DecryptFinal_ex(
            ctx,
            (unsigned char*) decrypted + decryptedLen,
            &padding))
        {
            /* Error */
            ret = -101;
        } else {
            ret = decryptedLen + padding;
        }
    }
    EVP_CIPHER_CTX_cleanup(ctx);
    EVP_CIPHER_CTX_free(ctx);
    if (ret < 0) {
        return ret;
    }

    z_stream stream;

    memset(&stream, 0, sizeof stream);
    switch (doInflateInit(&stream, MAX_WBITS)) {
        case Z_OK:
            break;
        case Z_MEM_ERROR:
            return -1;
        default:
            fputs(stream.msg, stderr);
            return -2;
    }

    stream.avail_out = outputlen;
    stream.next_out = (Bytef*) output;
    stream.avail_in = ret;
    stream.next_in = (Bytef*)decrypted;
    // initially there was Z_PARTIAL_FLUSH which is not allowed in manual:
    // http://www.zlib.net/manual.html
    ret = pInflate(&stream, Z_FINISH);
    switch (ret) {
        case Z_STREAM_END:
            ret = outputlen - stream.avail_out;
            break;
        case Z_OK:
            ret = -4;
            break;
        case Z_NEED_DICT:
            ret = -5;
            break;
        case Z_DATA_ERROR:
            ret = -6;
            break;
        case Z_STREAM_ERROR:
            ret = -7;
            break;
        case Z_MEM_ERROR:
            ret = -8;
            break;
        case Z_BUF_ERROR:
            ret = -9;
            break;
        default:
            ret = -10;
            break;
    }
    pInflateEnd(&stream);
    return ret;
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_compress_NativeCompress_aesflateDecompress
(JNIEnv* env,
jclass class,
jbyteArray input,
jint inputlen,
jlong output,
jint outputlen)
{
    (void) class;
    jbyte *array = (*env)->GetPrimitiveArrayCritical(env, input, 0);
    if (!array) {
        return -3;
    }
    jint ret = aesflateDecompress(array, inputlen, output, outputlen);
    (*env)->ReleasePrimitiveArrayCritical(
        env,
        input,
        array,
        JNI_ABORT);
    return ret;
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_compress_NativeCompress_aesflateDecompressBytes
(JNIEnv* env,
jclass class,
jlong input,
jint inputlen,
jlong output,
jint outputlen)
{
    (void) class;
    (void) env;
    jint ret = aesflateDecompress((jbyte*) input, inputlen, output, outputlen);
    return ret;
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL JavaCritical_ru_yandex_compress_NativeCompress_aesflateDecompressBytes
(jlong input,
jint inputlen,
jlong output,
jint outputlen)
{
    jint ret = aesflateDecompress((jbyte*) input, inputlen, output, outputlen);
    return ret;
}

__attribute__((__visibility__("hidden")))
jlong aesflateReadAndDecompress
(jint fd,
jlong fPos,
jint deflatedlen,
jlong output,
jint outputlen)
{
    void *readBuffer;

//    readBuffer = alloca(deflatedlen);
    readBuffer = getTlsReadBuffer(deflatedlen);
    int read = PreadFull(fd, readBuffer, deflatedlen, fPos);
    if (read == 0 || read < deflatedlen) {
        return EOF_ERROR;
    }
    if (read == -1) {
        return -errno;
    }

    return aesflateDecompress(readBuffer, deflatedlen, output, outputlen);
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_compress_NativeCompress_aesflateReadAndDecompress
(JNIEnv* env,
jclass class,
jint fd,
jlong fPos,
jint deflatedlen,
jlong output,
jint outputlen)
{
    (void) env;
    (void) class;
    return aesflateReadAndDecompress(fd, fPos, deflatedlen, output, outputlen);
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL JavaCritical_ru_yandex_compress_NativeCompress_aesflateReadAndDecompress
(jint fd,
jlong fPos,
jint deflatedlen,
jlong output,
jint outputlen)
{
    return aesflateReadAndDecompress(fd, fPos, deflatedlen, output, outputlen);
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_compress_NativeCompress_aesflateCompress
(JNIEnv* env,
jclass class,
jbyteArray input,
jint inputlen,
jbyteArray output,
jint outputlen,
jint level)
{
    (void) class;
    jint ret = 0;
    jbyte *inputArray, *outputArray = NULL;
    inputArray = (*env)->GetPrimitiveArrayCritical(env, input, 0);
    if (!inputArray) {
        ret = -3;
        goto exit;
    }
    outputArray = (*env)->GetPrimitiveArrayCritical(env, output, 0);
    if (!outputArray) {
        ret = -4;
        goto exit;
    }

    uLongf compressedLen = outputlen;
//    char *compressed = alloca(compressedLen);
    char *compressed = getTlsCompressBuffer(compressedLen);
    ret = pCompress2((Bytef*)compressed, &compressedLen,
        (Bytef*)inputArray, inputlen, level);
    if (ret == Z_OK) {
        ret = compressedLen;
    } else {
        ret = ret - 100;
    }

    if (ret < 0) {
        goto exit;
    }

    int encrypted = outputlen;
    EVP_CIPHER_CTX* ctx = EVP_CIPHER_CTX_new();
    EVP_CIPHER_CTX_init(ctx);
    EVP_EncryptInit_ex(ctx, EVP_aes_128_cbc(), NULL, key, iv);
    EVP_CIPHER_CTX_set_padding(ctx, 1);
    if (!EVP_EncryptUpdate(
            ctx,
            (unsigned char *) outputArray,
            &encrypted,
            (const unsigned char*)compressed,
            ret))
    {
        ret = -200;
    } else {
        int padding = outputlen - encrypted;
        if(!EVP_EncryptFinal_ex(
            ctx,
            (unsigned char *) (outputArray) + encrypted,
            &padding))
        {
            ret = -201;
        } else {
            ret = encrypted + padding;
        }
    }
    EVP_CIPHER_CTX_cleanup(ctx);
    EVP_CIPHER_CTX_free(ctx);

exit:
    if (inputArray) {
        (*env)->ReleasePrimitiveArrayCritical(
            env,
            input,
            inputArray,
            JNI_ABORT);
    }
    if (outputArray) {
        (*env)->ReleasePrimitiveArrayCritical(
            env,
            output,
            outputArray,
            ret > 0 ? 0 : JNI_ABORT);
    }
    return ret;
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_compress_NativeCompress_aesflateRawCompress
(JNIEnv* env,
jclass class,
jlong input,
jint inputlen,
jlong output,
jint outputlen,
jint level)
{
    (void) class;
    (void) env;
    jint ret = 0;

    uLongf compressedLen = outputlen;
//    char *compressed = alloca(compressedLen);
    char *compressed = getTlsCompressBuffer(compressedLen);
    ret = pCompress2((Bytef*)compressed, &compressedLen,
        (Bytef*)input, inputlen, level);
    if (ret == Z_OK) {
        ret = compressedLen;
    } else {
        ret = ret - 100;
    }

    if (ret < 0) {
        goto exit;
    }

    int encrypted = outputlen;
    EVP_CIPHER_CTX* ctx = EVP_CIPHER_CTX_new();
    EVP_CIPHER_CTX_init(ctx);
    EVP_EncryptInit_ex(ctx, EVP_aes_128_cbc(), NULL, key, iv);
    EVP_CIPHER_CTX_set_padding(ctx, 1);
    if (!EVP_EncryptUpdate(
            ctx,
            (unsigned char *) output,
            &encrypted,
            (const unsigned char*)compressed,
            ret))
    {
        ret = -200;
    } else {
        int padding = outputlen - encrypted;
        if(!EVP_EncryptFinal_ex(
            ctx,
            (unsigned char *) (output) + encrypted,
            &padding))
        {
            ret = -201;
        } else {
            ret = encrypted + padding;
        }
    }
    EVP_CIPHER_CTX_cleanup(ctx);
    EVP_CIPHER_CTX_free(ctx);
exit:
    return ret;
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL JavaCritical_ru_yandex_compress_NativeCompress_aesflateRawCompress
(jlong input,
jint inputlen,
jlong output,
jint outputlen,
jint level)
{
    jint ret = 0;

    uLongf compressedLen = outputlen;
//    char *compressed = alloca(compressedLen);
    char *compressed = getTlsCompressBuffer(compressedLen);
    ret = pCompress2((Bytef*)compressed, &compressedLen,
        (Bytef*)input, inputlen, level);
    if (ret == Z_OK) {
        ret = compressedLen;
    } else {
        ret = ret - 100;
    }

    if (ret < 0) {
        goto exit;
    }

    int encrypted = outputlen;
    EVP_CIPHER_CTX* ctx = EVP_CIPHER_CTX_new();
    EVP_CIPHER_CTX_init(ctx);
    EVP_EncryptInit_ex(ctx, EVP_aes_128_cbc(), NULL, key, iv);
    EVP_CIPHER_CTX_set_padding(ctx, 1);
    if (!EVP_EncryptUpdate(
            ctx,
            (unsigned char *) output,
            &encrypted,
            (const unsigned char*)compressed,
            ret))
    {
        ret = -200;
    } else {
        int padding = outputlen - encrypted;
        if(!EVP_EncryptFinal_ex(
            ctx,
            (unsigned char *) (output) + encrypted,
            &padding))
        {
            ret = -201;
        } else {
            ret = encrypted + padding;
        }
    }
    EVP_CIPHER_CTX_cleanup(ctx);
    EVP_CIPHER_CTX_free(ctx);
exit:
    return ret;
}


const char *aesflate_getErrorName(int code) {
    switch (code) {
        case -100:
            return "DecryptUpdate error";
        case -101:
            return "DecryptFinal error";
        case -200:
            return "EncryptUpdate error";
        case -201:
            return "EncryptFinal error";
        default:
            return "Deflate error";
    }
}

//Testing
__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_compress_NativeCompress_zstdCompressBlock
(JNIEnv* env,
jclass class,
jlong input,
jint inputlen,
jlong output,
jint outputlen)
{
    (void) env;
    (void) class;
    jint ret =
        ZSTD_compressCCtx(
            getZstdCompressCtx(),
            (char*) output,
            outputlen,
            (char*) input,
            inputlen,
            1);
    return ret;
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_compress_NativeCompress_zlibCompressBlock
(JNIEnv* env,
jclass class,
jlong input,
jint inputlen,
jlong output,
jint outputlen)
{
    (void) env;
    (void) class;
    uLongf destLen = outputlen;
    int ret = compress2((Bytef*)output, &destLen,
        (Bytef*)input, inputlen, 1);
    if (ret != Z_OK) {
        return -1;
    }
    return (jint)destLen;
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_compress_NativeCompress_zstdDecompressBlock
(JNIEnv* env,
jclass class,
jlong input,
jint inputlen,
jlong output,
jint outputlen)
{
    (void) env;
    (void) class;
    jint ret = ZSTD_decompressDCtx(
        getZstdDecompressCtx(),
        (char*)output,
        outputlen,
        (char*)input,
        inputlen);
    return ret;
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_compress_NativeCompress_zlibDecompressBlock
(JNIEnv* env,
jclass class,
jlong input,
jint inputlen,
jlong output,
jint outputlen)
{
    (void) env;
    (void) class;
    jint ret = inflateByteArray((jbyte*)input, inputlen,
        output, outputlen);
    return ret;
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_msearch_util_Compress_setThreadPriority0
(JNIEnv* env,
jclass class,
jint tid,
jint prio)
{
    (void) env;
    (void) class;
    errno = 0;
    if (setpriority(PRIO_PROCESS, tid, prio) == -1) {
        return errno;
    } else {
        return 0;
    }
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_msearch_util_Compress_setThreadIOPriority0
(JNIEnv* env,
jclass class,
jint tid,
jint ioclass,
jint prio)
{
    (void) env;
    (void) class;
    errno = 0;
    if (syscall(
        __NR_ioprio_set,
        IOPRIO_WHO_PROCESS,
        tid,
        IOPRIO_PRIO_VALUE((int) ioclass, (int)prio)) == -1)
    {
        return errno;
    } else {
        return 0;
    }
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_msearch_util_Compress_getCurrentThreadId
(JNIEnv* env,
jclass class)
{
    (void) env;
    (void) class;
    return syscall(__NR_gettid);
}

__attribute__((__visibility__("default")))
JNIEXPORT void JNICALL Java_ru_yandex_msearch_util_Compress_releaseFreeMemory
(JNIEnv* env,
jclass class)
{
    (void) env;
    (void) class;
    if (hasTcmalloc) {
        fprintf(stderr, "MallocExctension.ReleaseFreeMemory()\n");
        pMallocExtension_ReleaseFreeMemory();
    }
}

__attribute__((__visibility__("default")))
JNIEXPORT jboolean JNICALL Java_ru_yandex_msearch_util_Compress_tcmallocInit
(JNIEnv* env,
jclass class,
jstring ldLibraryPath)
{
    (void) env;
    (void) class;
    const char *nativeString = (*env)->GetStringUTFChars(env, ldLibraryPath, 0);
    bool success = tcmallocInit("libtcmalloc_minimal.so", nativeString)
        || tcmallocInit("libtcmalloc_minimal.so.4", nativeString);
    (*env)->ReleaseStringUTFChars(env, ldLibraryPath, nativeString);
    hasTcmalloc = success;
    return success;
}

__attribute__((__visibility__("default")))
JNIEXPORT jstring JNICALL Java_ru_yandex_msearch_util_Compress_allocatorStats
(JNIEnv* env,
jclass class)
{
    (void) class;
    if (hasTcmalloc) {
        int bufferSize = 2028;
        char statsBuffer[bufferSize];
        statsBuffer[0] = 0;
        pMallocExtension_GetStats(&statsBuffer[0], bufferSize);
        return (*env)->NewStringUTF(env, statsBuffer);
    } else {
        return NULL;
    }
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_compress_NativeCompress_fadviseRandom
(JNIEnv* env,
jclass class,
jint fd)
{
    (void) env;
    (void) class;
    return posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM);
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_compress_NativeCompress_fadviseDontneed
(JNIEnv* env,
jclass class,
jint fd)
{
    (void) env;
    (void) class;
    return posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED);
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_compress_NativeCompress_fadvisePrefetch
(JNIEnv* env,
jclass class,
jint fd,
jlong offset,
jlong len)
{
    (void) env;
    (void) class;
    return posix_fadvise(fd, offset, len, POSIX_FADV_WILLNEED);
}

__attribute__((__visibility__("default")))
JNIEXPORT jint JNICALL Java_ru_yandex_compress_NativeCompress_setODirect
(JNIEnv* env,
jclass class,
jint fd,
jboolean direct)
{
    (void) env;
    (void) class;
    int oldFlags = fcntl(fd, F_GETFL);
    if (oldFlags == -1) {
        return -1;
    }
    if (direct) {
        oldFlags |= O_DIRECT;
    } else {
        oldFlags &= ~O_DIRECT;
    }
    int ret = fcntl(fd, F_SETFL, oldFlags);
    fprintf(stderr, "FCNTL: old: %d, ret=%d\n", oldFlags, ret);
    return ret;
}

