import numpy as np
import yt.wrapper as yt
try:
    from crypta.lib.nirvana.email_gender.vectorize import DICT_SIZE  # noqa
    from crypta.lib.nirvana.email_gender.vectorize import (
        vectorize_one,
        MAX_LEN,
    )
except ImportError:
    import sys
    sys.path.append('vectorize/pymodule')
    from vectorize import (MAX_LEN, vectorize_one)
    from vectorize import DICT_SIZE  # noqa


def generator_wrapper(path, batch_size=25):
    batch_idx = 0
    X = np.zeros((batch_size, MAX_LEN), dtype=np.float32)
    y = np.zeros(batch_size, dtype=np.float32)
    while 1:
        for row in yt.read_table(path, raw=False):
            if batch_idx == batch_size:
                batch_idx = 0
            if batch_idx == 0:
                yield X, y
                X = np.zeros((batch_size, MAX_LEN), dtype=np.float32)
                y = np.zeros(batch_size, dtype=np.float32)
            if len(row['email'].split('@')) != 2:
                continue
            X[batch_idx] = vectorize_one(row['email'])
            y[batch_idx] = row['gender']
            batch_idx += 1
        yield X[:batch_idx], y[:batch_idx]
        batch_idx = 0
