import yt.wrapper as yt
import numpy as np
from sklearn.neighbors import NearestNeighbors
import logging
from copy import deepcopy
import random

logger = logging.getLogger(__name__)

TABLE_TEMPLATE = '//home/crypta/team/ezep/site2vec/Processes/CND/login2vec_v%d'
DATA = {}
LOGIN2VK = {}
LOGIN2OK = {}
TOPNREVERSE = 3


class UnknownLogin(Exception):
    pass


def binary_to_numpy(binary):
    return np.fromstring(binary, '<f4')


def nn_estimation(version, dim, n_neighbors=21):
    login2vec_table = TABLE_TEMPLATE % version
    logger.info('Loading table %s', login2vec_table)
    logger.info('Vector size is %d', dim)
    vectors = np.empty((yt.row_count(login2vec_table), dim))

    login2index = {}
    index2login = {}

    for i, row in enumerate(yt.read_table(login2vec_table)):
        vectors[i, :] = binary_to_numpy(row['vector'])
        login2index[row['login']] = i
        index2login[i] = row['login']
    logger.info('%d logins loaded', len(login2index))

    nn = NearestNeighbors(
        n_neighbors=n_neighbors,
        metric='cosine',
        algorithm='brute'
    ).fit(vectors)

    distances, indeces = nn.kneighbors(vectors)
    return login2index, index2login, distances, indeces


def setup():
    yt.config['proxy']['url'] = 'hahn.yt.yandex.net'
    yt.config['read_retries']['enable'] = False
    DATA['v7'] = nn_estimation(7, 256)
    DATA['v8'] = nn_estimation(8, 256)

    login2vk_path = '//home/crypta/team/ezep/DA908_social_links/login2vk_ids'
    LOGIN2VK.update({row['login']: row['ids'].split(',')
                     for row in yt.read_table(login2vk_path, raw=False)})
    logger.info('VK links loaded: %d' % len(LOGIN2VK))

    login2ok_path = '//home/crypta/team/ezep/DA908_social_links/login2ok_ids'
    LOGIN2OK.update({row['login']: row['ids'].split(',')
                     for row in yt.read_table(login2ok_path, raw=False)})
    logger.info('OK links loaded: %d' % len(LOGIN2OK))


def get_user_context(login, version):
    login2index, index2login, distances, indices = DATA[version]
    if login not in login2index:
        msg = "Unknown login {}".format(login)
        logger.error(msg)
        raise UnknownLogin(msg)
    idx = login2index[login]
    return login2index, index2login, distances, indices, idx


def get_user_id(login, version):
    login2index, index2login, distances, indices, idx = \
        get_user_context(login, version)
    return idx


def get_friends(login, version):
    login2index, index2login, distances, indices, idx = \
        get_user_context(login, version)
    neighbors = []
    result = zip(distances[idx], indices[idx])
    for distance, neighbor_idx in result:
        if index2login[neighbor_idx] != login:
            neighbors.append({
                'login': index2login[neighbor_idx],
                'distance': 1.0 - distance,
                'vk_ids': [],
            })
    return neighbors


def get_reverse_friends(login, version):
    login2index, index2login, distances, indices, idx = \
        get_user_context(login, version)
    reverse_friends = \
        [index2login[i]
         for i in np.where(indices[:, 1:(TOPNREVERSE + 1)] == idx)[0]]
    return reverse_friends


def enrich_user_data(unriched_users):
    # It's more efficient to modify original structure, but modifying +
    # returning the copy is more general and the overhead is small
    users = deepcopy(unriched_users)
    for user in users:
        user['vk_ids'] = LOGIN2VK.get(user['login'], [])
    return users


def get_random_params():
    version = random.choice(DATA.keys())
    login2index, index2login, distances, indices = DATA[version]
    login = random.choice(login2index.keys())
    return version, login
