from util.generic.vector cimport TVector
from util.generic.hash cimport THashMap
from util.generic.string cimport TString
from libcpp cimport bool
from util.system.types cimport ui32
import numpy as np

cdef extern from 'crypta/lib/nirvana/email_gender/vectorize/vectorize_email.h' \
        namespace 'NEmailGender':
    bool ConvertString(const TString&, const TVector[float]&)
    ui32 _MAX_LEN 'NEmailGender::MAX_LEN'
    ui32 _DICT_SIZE 'NEmailGender::DICT_SIZE'

MAX_LEN = _MAX_LEN
DICT_SIZE = _DICT_SIZE


def vectorize_one(email):
    cdef TString cpp_email
    cdef TVector[float] cpp_vectorized
    if type(email) == str:
        cpp_email = email
        ConvertString(cpp_email, cpp_vectorized)
    result = np.asarray(cpp_vectorized, dtype=np.float16)
    result = np.pad(result, (0, MAX_LEN - result.size), 'constant')
    return result
