import os
import json
import logging
import urllib
import datetime
from collections import defaultdict

import yt.wrapper as yt
import numpy as np
from sklearn.neighbors import NearestNeighbors

logger = logging.getLogger(__name__)

LOGIN2VEC_TABLE = '//home/crypta/production/profiles/cnd/login2vec'
STAFF_API = (
    'https://staff-api.yandex-team.ru/v3/persons'
    '?_query='
    '&official.is_dismissed=false'
    '&official.is_robot=false'
    '&official.is_homeworker=false'
    '&_fields=login,personal.gender,personal.birthday,personal.family_status,name.first.ru,official.position.ru,location.office.city.name.en'  # noqa
    '&_limit=1000000'
)
AUTH_TOKEN = os.environ.get('AUTH_TOKEN')
TOP_FRIENDS = 104


def binary_to_numpy(binary):
    return np.fromstring(binary, '<f4')


def calculate_age_from_birthday(birthday):
    if not birthday:
        return 'unknown'
    born = datetime.datetime.strptime(birthday, '%Y-%m-%d')
    today = datetime.date.today()
    return today.year - born.year - ((today.month, today.day) < (born.month, born.day))


def get_staff_data():
    logger.info('Reading staff')
    req = urllib.request.Request(STAFF_API)
    req.add_header('Authorization', 'OAuth %s' % AUTH_TOKEN)
    resp = urllib.request.urlopen(req)
    content = json.loads(resp.read())
    if content['pages'] != 1:
        logger.error('Not all staff data is downloaded')

    logger.info('Parsing staff data')
    staff = defaultdict(dict)
    for employee in content['result']:
        login = employee['login']
        staff[login]['name'] = employee['name']['first']['ru']
        staff[login]['gender'] = employee['personal']['gender']
        staff[login]['martial'] = employee['personal']['family_status']
        staff[login]['age'] = calculate_age_from_birthday(employee['personal']['birthday'])
        staff[login]['position'] = employee['official']['position']['ru']
        staff[login]['city'] = employee['location']['office']['city']['name']['en']

    logger.info('%d logins parsed', len(staff))

    return staff


def get_login2vec(good_logins):
    yt.config['proxy']['url'] = 'hahn.yt.yandex.net'
    yt.config['read_retries']['enable'] = False

    logger.info('Downloading login2vec')
    login2vec = {}
    for row in yt.read_table(LOGIN2VEC_TABLE):
        login = row['login']
        vector = binary_to_numpy(yt.yson.get_bytes(row['vector']))
        if login in good_logins:
            if login in login2vec:
                login2vec[login] += vector
            else:
                login2vec[login] = vector
        else:
            logger.info('Strange login %s', login)
    blacklist = set()

    keys = list(login2vec.keys())
    for each in good_logins:
        if each not in login2vec:
            login2vec[each] = login2vec[keys[hash(each) % len(keys)]]
            blacklist.add(each)

    logger.info('Done. There are %d logins', len(login2vec))
    return login2vec, blacklist


def get_neighbours_data(login2vec):
    vectors = np.empty((len(login2vec), len(next(iter(login2vec.values())))))
    login2index = {}
    index2login = {}

    for i, (login, vector) in enumerate(login2vec.items()):
        vectors[i, :] = vector
        login2index[login] = i
        index2login[i] = login

    logger.info('Calculating neighbours')
    neighbours = NearestNeighbors(
        n_neighbors=len(login2vec) - 1,
        metric='cosine',
        algorithm='brute'
    ).fit(vectors)

    distances, indeces = neighbours.kneighbors(vectors)
    return login2index, index2login, distances, indeces


def get_friends(login, login2index, index2login, distances, indeces,
                filter_mask, lover, all_lovers, online, onliners, blacklist):
    logger.info('Getting friends for %s', login)
    friends = []
    idx = login2index[login]

    friends_validness = filter_mask[indeces[idx]]
    result = zip(
        indeces[idx][friends_validness],
        distances[idx][friends_validness]
    )
    for friend_index, friend_distance in result:
        if index2login[friend_index] != login:
            if lover != "ANY":
                if lover == "1" and index2login[friend_index] in all_lovers:
                    pass
                elif lover == "0" and index2login[friend_index] not in all_lovers:
                    pass
                else:
                    continue
            elif index2login[friend_index] in blacklist:
                continue
            if online != "ANY":
                if online == "1" and index2login[friend_index] in onliners:
                    pass
                elif online == "0" and index2login[friend_index] not in onliners:
                    pass
                else:
                    continue
            friends.append({
                'login': index2login[friend_index],
                'distance': 1.0 - friend_distance,
            })
            if len(friends) >= TOP_FRIENDS:
                break

    return friends


def age_bin(age):
    if age == 'unknown':
        return 'unknown'
    if age < 18:
        return '_18'
    elif age < 21:
        return '18_21'
    elif age < 25:
        return '21_25'
    elif age < 30:
        return '25_30'
    elif age < 35:
        return '30_35'
    elif age < 40:
        return '35_40'
    elif age < 45:
        return '40_45'
    else:
        return '45_'


def rank_login_in_other_list(login, other, indeces, login2index):
    return (indeces[login2index[login]] == login2index[other]).argmax()


def get_staff_filtering_vectors(staff, index2login):
    genders = np.empty(len(index2login), dtype='|S20')
    ages = np.empty(len(index2login), dtype='|S20')
    martials = np.empty(len(index2login), dtype='|S20')
    cities = np.empty(len(index2login), dtype='|S20')

    for index, login in index2login.items():
        genders[index] = str(staff[login]['gender'].upper())
        ages[index] = str(age_bin(staff[login]['age']))
        martials[index] = str(staff[login]['martial'].upper())
        cities[index] = str(staff[login]['city'])

    return genders, ages, martials, cities
