from util.generic.vector cimport TVector
from util.generic.string cimport TString
from libcpp cimport bool
from util.system.types cimport ui32
import numpy as np

cdef extern from 'crypta/lib/nirvana/email_organization/vectorize/vectorize_email.h' \
        namespace 'NEmailOrganization':
    bool ConvertString(const TString&, const TVector[float]&, const TVector[float]&);
    ui32 _MAX_LEN 'NEmailOrganization::MAX_LEN'
    ui32 _DICT_SIZE 'NEmailOrganization::DICT_SIZE'
    ui32 _DOMAIN_DICT_SIZE 'NEmailOrganization::DOMAIN_DICT_SIZE'

MAX_LEN = _MAX_LEN
DICT_SIZE = _DICT_SIZE
DOMAIN_DICT_SIZE = _DOMAIN_DICT_SIZE


def filter_ascii(email):
    return ''.join([s for s in email if ord(s) < 128]).encode('ascii')

def vectorize_one(email):
    cdef TString cpp_email
    cdef TVector[float] cpp_vectorized_login
    cdef TVector[float] cpp_vectorized_domain
    cpp_email = filter_ascii(email)
    ConvertString(cpp_email, cpp_vectorized_login, cpp_vectorized_domain)
    vectorized_login = np.asarray(cpp_vectorized_login, dtype=np.float16)
    vectorized_login = np.pad(vectorized_login, (0, MAX_LEN - vectorized_login.size), 'constant')
    vectorized_domain = np.asarray(cpp_vectorized_domain, dtype=np.float16)
    return vectorized_login, vectorized_domain
