cimport numpy as np
import cython
import numpy as np

from cpython.string cimport PyString_AsString
from cython.operator cimport address
from libc.stdio cimport FILE, fopen, fclose
from libc.stdlib cimport malloc, free
from libc.string cimport strlen
from libc.stdlib cimport rand, srand
from jafar_yt.utils.structarrays import Series

cdef char ** to_cstring_array(list_str):
    cdef char **result = <char **>malloc(len(list_str) * sizeof(char *))
    for i in xrange(len(list_str)):
        result[i] = PyString_AsString(list_str[i])
    return result

cdef extern from "cmph.h":
    ctypedef unsigned int cmph_uint32

    ctypedef enum CMPH_ALGO:
        CMPH_BMZ
        CMPH_BMZ8
        CMPH_CHM
        CMPH_BRZ
        CMPH_FCH
        CMPH_BDZ
        CMPH_BDZ_PH
        CMPH_CHD_PH
        CMPH_CHD
        CMPH_COUNT

    ctypedef struct cmph_io_adapter_t:
        void *data;
        cmph_uint32 nkeys;
        int (*read)(void *, char **, cmph_uint32 *)
        void (*dispose)(void *, char *, cmph_uint32)
        void (*rewind)(void *)

    ctypedef struct cmph_config_t:
        CMPH_ALGO algo
        cmph_io_adapter_t *key_source
        cmph_uint32 verbosity
        double c
        void *data

    ctypedef struct cmph_t:
        CMPH_ALGO algo
        cmph_uint32 size
        cmph_io_adapter_t *key_source
        void *data

    cmph_io_adapter_t *cmph_io_vector_adapter(char ** vector, cmph_uint32 nkeys);
    cmph_config_t *cmph_config_new(cmph_io_adapter_t *key_source)
    void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo)
    cmph_t *cmph_new(cmph_config_t *mph)
    cmph_uint32 cmph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen) nogil
    void cmph_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd)
    int cmph_dump(cmph_t *mphf, FILE *f)
    cmph_t *cmph_load(FILE *f)
    void cmph_config_destroy(cmph_config_t *mph)
    void cmph_destroy(cmph_t *mphf)

@cython.boundscheck(False)
@cython.wraparound(False)
cdef void cmph_cmap(np.int32_t[:] result, cmph_t *mphf, char** keys_cstring_array, int count) nogil:
    cdef char* key
    cdef int index
    for index in xrange(count):
        key = keys_cstring_array[index]
        result[index] = cmph_search(mphf, key, strlen(key))
from pandas import DataFrame, Series

cdef class MPH:

    cdef cmph_t *mph_
    cdef cmph_config_t *config_

    def __call__(self, const char* key):
        return cmph_search(self.mph_, key, <cmph_uint32> len(key))

    cpdef map(self, keys):
        if isinstance(keys, Series):
            keys = keys._array
        keys_cstring_array = to_cstring_array(keys)
        count = len(keys)
        cdef np.ndarray[np.int32_t, ndim=1] result = np.empty(count, dtype=np.int32)
        cdef np.int32_t[:] result_view = result
        cmph_cmap(result_view, self.mph_, keys_cstring_array, count)
        return result

    cpdef dump(self, const char* name):
        fd = fopen(name, 'w')
        cmph_config_set_mphf_fd(self.config_, fd)
        cmph_dump(self.mph_, fd)
        fclose(fd)

    cpdef load(self, const char* name):
        fd = fopen(name, 'r')
        self.mph_ = cmph_load(fd)
        fclose(fd)

    def __dealloc__(self):
        cmph_config_destroy(self.config_)
        cmph_destroy(self.mph_)


cpdef set_seed(value):
    srand(value)


cpdef MPH generate_hash(keys):
    array = to_cstring_array(keys)
    source = cmph_io_vector_adapter(array, len(keys))

    for algorithm in (CMPH_BDZ, CMPH_CHD):
        config = cmph_config_new(source)
        cmph_config_set_algo(config, algorithm)
        mph = cmph_new(config)
        if mph:
            break

    if not mph:
        raise RuntimeError("Failed to generate a hash function")
    free(array)
    wrapper = MPH()
    wrapper.mph_ = mph
    wrapper.config_ = config
    return wrapper


cpdef MPH load_hash(name):
    wrapper = MPH()
    wrapper.load(name)
    return wrapper
