#!/usr/bin/env python
# -*- coding: utf-8 -*-
import collections
import os
import random
import time

from requests.exceptions import ConnectionError
from retry import retry
import six

from crypta.lib.python.custom_ml.tools.training_utils import get_item_with_max_value
from crypta.lib.python.sandbox.client import SandboxClient
from crypta.lib.python.solomon.reporter import report_metrics_to_solomon

from crypta.profile.utils.config import config
from crypta.profile.utils.config.secrets import get_secrets

# for avoiding anomaly uids like "50874460171463943491" and "25211945661439982192"

MAX_INT = 2 ** 64 - 1

socdem_thresholds = {}

now_timestamp = int(time.time())

bb_keyword_id_to_field_name = {
    174: 'gender',
    175: 'age_segments',
    176: 'income_segments',
    198: 'top_common_site_ids',
    216: 'heuristic_segments',
    217: 'probabilistic_segments',
    220: 'yandex_loyalty',
    281: 'marketing_segments',
    543: 'user_age_6s',
    544: 'lal_common',
    545: 'lal_private',
    546: 'lal_internal',
    547: 'heuristic_common',
    548: 'heuristic_private',
    549: 'heuristic_internal',
    557: 'audience_segments',
    569: 'exact_socdem',
    595: 'affinitive_site_ids',
    601: 'longterm_interests',
    602: 'shortterm_interests',
    614: 'income_5_segments',
}
field_name_to_bb_keyword_id = {v: k for k, v in bb_keyword_id_to_field_name.items()}
field_name_to_bb_keyword_id['interests_composite'] = 217


socdem_classification_basic_schema = {
    'gender': 'any',
    'user_age_6s': 'any',
    'income_5_segments': 'any',
    'update_time': 'uint64',
}

devid_classification_schema = socdem_classification_basic_schema.copy()
devid_classification_schema['id'] = 'string'
devid_classification_schema['id_type'] = 'string'

yandexuid_classification_schema = socdem_classification_basic_schema.copy()
yandexuid_classification_schema['yandexuid'] = 'uint64'

segment_fields_schema = {
    'heuristic_segments': 'any',
    'probabilistic_segments': 'any',
    'marketing_segments': 'any',

    'interests_composite': 'any',

    'shortterm_interests': 'any',
    'longterm_interests': 'any',

    'lal_common': 'any',
    'lal_private': 'any',
    'lal_internal': 'any',

    'heuristic_common': 'any',
    'heuristic_private': 'any',
    'heuristic_internal': 'any',

    'audience_segments': 'any',
}

segment_storage_by_id_schema = {
    'id': 'string',
    'id_type': 'string',
    'update_time': 'uint64',
}
segment_storage_by_id_schema.update(segment_fields_schema)

segment_storage_by_id_with_crypta_id_schema = {
    'crypta_id': 'uint64',
}
segment_storage_by_id_with_crypta_id_schema.update(segment_storage_by_id_schema)

segment_storage_by_yandexuid_schema = {
    'yandexuid': 'uint64',
}
segment_storage_by_yandexuid_schema.update(segment_fields_schema)

segment_storage_by_crypta_id_schema = {
    'crypta_id': 'uint64',
}
segment_storage_by_crypta_id_schema.update(segment_fields_schema)

additional_yandexuid_attributes_schema = {
    'yandexuid': 'uint64',
    'icookie': 'uint64',
    'crypta_id': 'uint64',

    'yandex_loyalty': 'double',

    'top_common_site_ids': 'any',
    'top_common_sites': 'any',
    'affinitive_site_ids': 'any',
    'affinitive_sites': 'any',
}

additional_crypta_id_attributes_schema = {
    'top_common_site_ids': 'any',
    'top_common_sites': 'any',
    'affinitive_site_ids': 'any',
    'affinitive_sites': 'any',
}


crypta_yandexuid_profiles_schema = additional_yandexuid_attributes_schema.copy()
crypta_yandexuid_profiles_schema.update(segment_storage_by_yandexuid_schema)
crypta_yandexuid_profiles_schema.update({
    'yandexuid': 'uint64',
    'update_time': 'uint64',
    'exact_socdem': 'any',

    'gender': 'any',
    'user_age_6s': 'any',
    'income_5_segments': 'any',

    'age_segments': 'any',
    'income_segments': 'any',
})

crypta_id_profiles_schema = segment_storage_by_crypta_id_schema.copy()
crypta_id_profiles_schema.update(additional_crypta_id_attributes_schema)
crypta_id_profiles_schema.update({
    'crypta_id': 'uint64',
    'update_time': 'uint64',
    'exact_socdem': 'any',

    'gender': 'any',
    'user_age_6s': 'any',
    'income_5_segments': 'any',

    'income_segments': 'any',
    'age_segments': 'any',
})

daily_app_metrica_schema = {
    'id': 'string',
    'id_type': 'string',
    'ua_profile': 'string',
    'apps': 'any',
    'model': 'string',
    'manufacturer': 'string',
    'main_region_obl': 'uint64',
    'region_ids': 'any',
    'categories': 'any',
}

monthly_app_metrica_schema = daily_app_metrica_schema.copy()
monthly_app_metrica_schema.update({'days_active': 'any'})

daily_devid_vector_schema = {
    'id': 'string',
    'id_type': 'string',
    'vector': 'string',
}

monthly_yandexuid_vector_schema = {
    'yandexuid': 'uint64',
    'days_active': 'any',
    'vector': 'string',
}

monthly_devid_vector_schema = daily_devid_vector_schema.copy()
monthly_devid_vector_schema.update({'days_active': 'any'})


def get_exact_socdem_dict(record, thresholds):
    exact_socdem = {}
    value_against_threshold_ratios = collections.defaultdict(dict)
    for data_type in 'gender', 'user_age_6s', 'income_segments', 'income_5_segments':
        if data_type in record:
            for segment, value in record[data_type].items():
                ratio = value / thresholds[data_type][segment]
                if ratio > 1.0:
                    value_against_threshold_ratios[data_type][segment] = ratio
            if value_against_threshold_ratios[data_type]:
                segment, _ = get_item_with_max_value(value_against_threshold_ratios[data_type])
                if data_type == 'user_age_6s':
                    data_type = 'age_segment'
                elif data_type == 'income_segments':
                    data_type = 'income_segment'
                elif data_type == 'income_5_segments':
                    data_type = 'income_5_segment'
                exact_socdem[data_type] = segment
    return exact_socdem


@retry(ConnectionError, tries=10, delay=60)
def get_socdem_thresholds_from_api():
    from crypta.profile.utils.api import get_api
    global socdem_thresholds
    if not socdem_thresholds:
        socdem_thresholds = get_api().lab.getSocdemThresholds().result()
    return socdem_thresholds


@retry(ConnectionError, tries=10, delay=60)
def update_socdem_threshold(socdem, segment, threshold):
    from crypta.profile.utils.api import get_api

    get_api().lab.updateSocdemThreshold(
        socdem=socdem,
        segment=segment,
        threshold=threshold,
    ).result()


def is_valid_uint64(x):
    if isinstance(x, six.string_types) and x.isdigit():
        x = int(x)
    if isinstance(x, six.integer_types):
        return (x >= 0) and (x <= MAX_INT)
    else:
        return False


def is_valid_yandexuid(yandexuid):
    if not is_valid_uint64(yandexuid):
        return False

    yandexuid = int(yandexuid)
    yandexuid_timestamp = yandexuid % 10000000000

    # lower bound is 09.09.2001
    return 1000000000 < yandexuid_timestamp < now_timestamp


def normalize_weights(weights):
    return weights * len(weights) / weights.sum()


def normalize_dict(dictionary):
    values_sum = sum(dictionary.values())
    if values_sum:
        return {k: float(v) / values_sum for k, v in dictionary.items()}
    else:
        return dictionary


def get_random_table_ranges_from_yt(yt, table_name, sample_chunk_count=100, sample_chunk_size=100):
    table_ranges = []
    random_rows = set()

    row_count = yt.get_attribute(table_name, 'row_count')

    if row_count <= sample_chunk_count * sample_chunk_size:
        return table_ranges

    # avoiding endless loop for small tables
    n_tries = sample_chunk_count * 2
    for _ in range(n_tries):
        random_row_number = random.randint(0, row_count - sample_chunk_size)
        random_rows.add(random_row_number)
        if len(random_rows) >= sample_chunk_count:
            break

    for row_number in random_rows:
        table_range = yt.TablePath(table_name, start_index=row_number, end_index=row_number + sample_chunk_size)
        table_ranges.append(table_range)

    return table_ranges


def partition(list_to_split, number_of_chunks):
    chunk_size = len(list_to_split) / number_of_chunks

    chunks = []
    for i in range(number_of_chunks):
        if i == number_of_chunks - 1:
            chunks.append(list_to_split[i * chunk_size:])
        else:
            chunks.append(list_to_split[i * chunk_size:(i + 1) * chunk_size])
    return chunks


def targets_to_tables(targets):
    return [target.table for target in targets]


def get_matching_table(from_id_type, to_id_type, indevice=False):
    if indevice:
        template_string = os.path.join(config.CRYPTA_PUBLIC_MATCHING_DIR, '{from_id_type}/direct/{to_id_type}')
    else:
        template_string = os.path.join(config.CRYPTA_PUBLIC_MATCHING_DIR, '{from_id_type}/{to_id_type}')

    return template_string.format(
        from_id_type=from_id_type,
        to_id_type=to_id_type,
    )


def revert_dict(mapping):
    return {v: k for k, v in mapping.items()}


def get_sandbox_client():
    return SandboxClient(get_secrets().get_secret('ROBOT_CRYPTA_SANDBOX_OAUTH'))


@retry(Exception, tries=10, delay=60)
def upload_to_sandbox(path, resource_type, description, owner, attributes, logger, is_folder=False, alias_path=None):
    client = get_sandbox_client()
    return client.upload_to_sandbox(
        path, resource_type, description, owner, attributes, logger, is_folder=is_folder, alias_path=alias_path,
    )


@retry(ConnectionError, tries=10, delay=60)
def download_file_from_sandbox(resource_type, released, path, path_to_save, resource_id=None):
    client = get_sandbox_client()
    if resource_id is None:
        resource_id = client._get_last_released_resource_id_for_status(resource_type, released)
    client.load_resource(resource_id, path_to_save, resource_path=path)


def do_nothing(row):
    yield row


def report_ml_metrics_to_solomon(service, metrics_to_send):
    report_metrics_to_solomon(
        project=config.SOLOMON_CRYPTA_ML_PROJECT,
        cluster=config.environment,
        service=service,
        metrics=metrics_to_send,
        oauth_token=get_secrets().get_secret('SOLOMON_OAUTH'),
    )
