#!/usr/bin/python -tt
# coding=utf-8

import requests
import os
import socket
import struct
import time
import sys
import traceback
from datetime import datetime, date, timedelta
from math import radians, cos, sin, asin, sqrt
from multiprocessing.dummy import Pool as ThreadPool
from collections import defaultdict, Counter
from yt.common import format_error
import re

import yt.wrapper as yt

from rtcconf import config

# Do not delete this!! It is required for workaround on python AttributeError: _strptime problem in run_parallel
datetime.strptime('', '')

RE_NOT_DIGITS = re.compile(r'[^\d]')
RE_HASH = re.compile(r'[0-9a-zA-Z]{32}')
RE_TIMESTAMP = re.compile(r'[0-9]{10}')
RE_MAC_ADDRESS_COLON = re.compile(r'([0-9A-Fa-f]{2}[:]){5}([0-9A-Fa-f]{2})')  # recommended!
RE_MAC_ADDRESS_HYPHEN = re.compile(r'([0-9A-Fa-f]{2}[-]){5}([0-9A-Fa-f]{2})')
RE_MAC_ADDRESS_COLON_HYPHEN = re.compile(r'([0-9A-Fa-f]{2}[:-]){5}([0-9A-Fa-f]{2})')
RE_YUID = re.compile(r'[0-9]{1,20}')
RE_UUID = re.compile(r'[0-9a-z]{32}')
RE_AGE = re.compile(r'[0-9]{1,3}')

RE_NOT_DIGITS_STRING = re.compile(r'^' + RE_NOT_DIGITS.pattern + '$')
RE_HASH_STRING = re.compile(r'^' + RE_HASH.pattern + '$')
RE_TIMESTAMP_STRING = re.compile(r'^' + RE_TIMESTAMP.pattern + '$')
RE_MAC_ADDRESS_COLON_STRING = re.compile(r'^' + RE_MAC_ADDRESS_COLON.pattern + '$')
RE_MAC_ADDRESS_HYPHEN_STRING = re.compile(r'^' + RE_MAC_ADDRESS_HYPHEN.pattern + '$')
RE_MAC_ADDRESS_COLON_HYPHEN_STRING = re.compile(r'^' + RE_MAC_ADDRESS_COLON_HYPHEN.pattern + '$')
RE_YUID_STRING = re.compile(r'^' + RE_YUID.pattern + '$')
RE_UUID_STRING = re.compile(r'^' + RE_HASH.pattern + '$')
RE_AGE_STRING = re.compile(r'^' + RE_AGE.pattern + '$')

MONRUN_OK_CODE = 0
MONRUN_ERROR_CODE = 2


def run_parallel(lambdas):
    if len(lambdas) > 0:
        pool = ThreadPool(len(lambdas))
        rs = [pool.apply_async(l) for l in lambdas]
        pool.close()
        pool.join()
        [r.get() for r in rs]


def wait_all(ops):
    if not ops:
        return

    failed_states = ['aborted', 'failed']
    completed_state = 'completed'
    done = False

    while not done:
        op_states = []
        for op in ops:
            try:
                if op:
                    op_state = op.get_state()
                    op_states.append((op_state, op))
            except yt.errors.YtHttpResponseError:
                pass

        failed_ops = [op for op_state, op in op_states if op_state in failed_states]
        if failed_ops:
            # will raise 1st exception
            [op.wait() for op in failed_ops]

        done = all([op_state == completed_state for op_state, op in op_states])
        # stop early
        if done:
            break

        ops = [op for op_state, op in op_states if op_state != completed_state]
        time.sleep(10)


def norm_id(id_to_norm):
    if not id_to_norm:
        return id_to_norm
    id_to_norm = id_to_norm.strip()
    if not id_to_norm:
        return id_to_norm

    # id_converted = id_to_norm.lower().replace('-', '')
    if id_to_norm == '00000000000000000000000000000000' or id_to_norm == '00000000-0000-0000-0000-000000000000' \
            or id_to_norm == '0' or id_to_norm == '!' or id_to_norm == 'null':
        return ''

    return id_to_norm


def ip2int(addr):
    return struct.unpack("!I", socket.inet_aton(addr))[0]


def int2ip(addr):
    if not addr:
        return ''
    return socket.inet_ntoa(struct.pack("!I", int(addr)))


def int2ip_p(addr):
    if not addr:
        return ''
    try:
        return socket.inet_ntop(socket.AF_INET6, addr)
    except ValueError:
        return ''


def to_ipv6(ip):
    if not ip:
        return ''

    if ip.startswith('::ffff:'):
        ip = ip.replace('::ffff:', '')

    # not local or yandex ip
    if ip == '127.0.0.1' or ip == '0.0.0.0':
        return ''

    return ip


def get_ts_format(timestamp, timezone, dateformat):
    if not os.environ.get('TZ', ''):
        os.environ['TZ'] = 'GMT'
        time.tzset()

    date = datetime.strptime(timestamp, dateformat)
    timezone = timezone.replace(':', '')
    timezone = -1 * int(timezone)
    gmt_offset_seconds = (timezone / 100) * 60 * 60

    ts = int(time.mktime(date.timetuple())) + gmt_offset_seconds

    return ts


def get_ts(timestamp, timezone):
    return get_ts_tz(timestamp, timezone)[0]


def get_ts_tz(timestamp, timezone):
    if not timestamp or not timezone:
        return (None, None)

    if not os.environ.get('TZ', ''):
        os.environ['TZ'] = 'GMT'
        time.tzset()

    date = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')

    timezone = timezone.replace(':', '')
    timezone = int(timezone) * 60 * 60 / 100

    ts = int(time.mktime(date.timetuple())) - timezone
    return (ts, timezone)


def has_intersection(min_device_ts, max_device_ts, min_yuid_ts, max_yuid_ts):
    if min_device_ts <= min_yuid_ts <= max_device_ts:
        return True
    elif min_yuid_ts <= min_device_ts <= max_yuid_ts:
        return True

    return False


def not_far_away(min_device_ts, max_device_ts, min_yuid_ts, max_yuid_ts):
    if 0 < max_device_ts - min_yuid_ts < 7200:
        return True
    if 0 < max_yuid_ts - min_device_ts < 7200:
        return True

    return False


def get_date_before(date_str, days_before):
    date = date_str_to_date(date_str)
    yesterday = date.today() - timedelta(days_before)
    return yesterday.strftime('%Y-%m-%d')


def get_previous_day(delta):
    yesterday = date.today() - timedelta(delta)
    return yesterday.strftime('%Y-%m-%d')


def get_yesteday():
    yesterday = date.today() - timedelta(1)
    return yesterday.strftime('%Y-%m-%d')


def get_dates_before(date, days):
    dt = datetime.strptime(date, '%Y-%m-%d')
    return [(dt - timedelta(days=i)).strftime('%Y-%m-%d') for i in range(days)]


def get_date_before(date, days):
    dt = datetime.strptime(date, '%Y-%m-%d')
    return (dt - timedelta(days=days)).strftime('%Y-%m-%d')


def ts_to_date_str(ts):
    return datetime.fromtimestamp(ts).strftime('%Y-%m-%d')


def date_str_to_ts(date_str):
    return int(date_str_to_date(date_str).strftime("%s"))


def date_str_to_date(date_str):
    return datetime.strptime(date_str, '%Y-%m-%d')


def ts_to_datetime_str(ts):
    return datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')


def convert_proto_string(data):
    data = data.strip()
    data = data.replace('\\0', '\0')
    data = data.replace('\\t', '\t')
    data = data.replace('\\n', '\n')
    data = data.replace('\\r', '\r')
    data = data.replace('\\=', '=')
    data = data.replace('\\"', '"')
    data = data.replace('\\\\', '\\')
    return data


def convert_ip_string(ip):
    ip = ip.replace('\\0', '\0')
    ip = ip.replace('\\t', '\t')
    ip = ip.replace('\\n', '\n')
    ip = ip.replace('\\r', '\r')
    ip = ip.replace('\\\\', '\\')

    return ip


def write_monrun(monrun_file_name, msg):
    with open(monrun_file_name, 'w') as monrun_file:
        monrun_file.write(msg)


def monrun_ok(monrun_dir=config.MONRUN_DATE_FOLDER):
    with open(monrun_dir + os.path.basename(sys.argv[0]) + '.result', 'a') as f:
        f.write('{}; OK\n'.format(
            MONRUN_OK_CODE
        ))


def monrun_error(prefix='', monrun_dir=config.MONRUN_DATE_FOLDER):
    trace = traceback.format_exc()
    line = trace.replace('\n', '\\n')
    with open(monrun_dir + os.path.basename(sys.argv[0]) + '.result', 'a') as f:
        f.write('{}; {}\n'.format(
            MONRUN_ERROR_CODE, prefix + line
        ))


def monrun_ok_nodate():
    return monrun_ok(monrun_dir=config.MONRUN_FOLDER)


def monrun_error_nodate():
    return monrun_error(prefix='', monrun_dir=config.MONRUN_FOLDER)


def luigi_get_result_file(task):
    task_id = task.task_id
    result_dir = os.path.join(config.MONRUN_DATE_FOLDER, os.path.basename(sys.argv[0]))
    if not os.path.exists(result_dir):
        os.makedirs(result_dir)
    result_file = os.path.join(result_dir, ".".join([task_id, 'result']))
    return task_id, result_file


def monrun_luigi_error(task, exception):
    # default pretty formatting is cutting too much
    if isinstance(exception, yt.YtError):
        yt_exception_text = format_error(exception, attribute_length_limit=None)
    else:
        yt_exception_text = ""
    trace = traceback.format_exc().replace('\n', '\\n')
    task_id, result_file = luigi_get_result_file(task)
    with open(result_file, 'a') as f:
        f.write(
            (";".join(map(str, (
                MONRUN_ERROR_CODE, ' {}: Error'.format(task_id), trace, yt_exception_text,
            )))).replace('\n', '\\n') + '\n'
        )


def monrun_luigi_ok(task):
    task_id, result_file = luigi_get_result_file(task)
    with open(result_file, 'a') as f:
        f.write(
            ('{}; {}: OK'.format(
                MONRUN_OK_CODE, task_id
            )).replace('\n', '\\n') + '\n'
        )


def haversine(lat1, lon1, lat2, lon2):
    # convert decimal degrees to radians
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
    c = 2 * asin(sqrt(a))
    m = 6367 * c * 1000
    return m


def bytes_to_long(bytes_array):
    return sum((b << (k * 8) for k, b in enumerate(bytes_array)))


def default_to_regular(d):
    if isinstance(d, defaultdict) or isinstance(d, Counter):
        d = {str(k): default_to_regular(v) for k, v in d.iteritems()}
    if isinstance(d, set):
        return list(d)
    return d


import operator


def combine_dicts(a, b, op=operator.add):
    return dict(a.items() + b.items() +
                [(k, op(a[k], b[k])) for k in b.viewkeys() & a.viewkeys()])


def murmur(data):
    if len(data) < 32:
        data = ''.join(['0'] * (32 - len(data))) + data

    seed = 0xe17a1465
    m = 0xc6a4a7935bd1e995
    r = 47
    MASK = 2 ** 64 - 1
    data_as_bytes = bytearray(data)
    h = seed ^ ((m * len(data_as_bytes)) & MASK)

    for ll in range(0, len(data_as_bytes), 8):
        k = bytes_to_long(data_as_bytes[ll:ll + 8])
        k = (k * m) & MASK
        k = k ^ ((k >> r) & MASK)
        k = (k * m) & MASK
        h = (h ^ k)
        h = (h * m) & MASK

    l = len(data_as_bytes) & 7

    if l >= 7:
        h = (h ^ (data_as_bytes[6] << 48))

    if l >= 6:
        h = (h ^ (data_as_bytes[5] << 40))

    if l >= 5:
        h = (h ^ (data_as_bytes[4] << 32))

    if l >= 4:
        h = (h ^ (data_as_bytes[3] << 24))

    if l >= 3:
        h = (h ^ (data_as_bytes[4] << 16))

    if l >= 2:
        h = (h ^ (data_as_bytes[4] << 8))

    if l >= 1:
        h = (h ^ data_as_bytes[4])
        h = (h * m) & MASK

    h = h ^ ((h >> r) & MASK)
    h = (h * m) & MASK
    h = h ^ ((h >> r) & MASK)

    return h


from itertools import chain
import types


def flatten(obj, list_types=(list, tuple, set, types.GeneratorType)):
    """ Create flat list from all elements. """
    if isinstance(obj, list_types):
        return list(chain(*map(flatten, obj)))
    return [obj]


def top_dict(orig_dict, top_n, top_func):
    """
    :type orig_dict: dict
    :param top_n: how many item to keep
    :type top_n: int
    :param top_func: lambda extracting item priority number from item
    :return: dict consisting of top_n dict items according to top_func
    """
    return dict(sorted(orig_dict.iteritems(), key=top_func, reverse=True)[:top_n])


def md5(value):
    import hashlib
    return hashlib.md5(value).hexdigest()


def prepare_phone_md5(phone):
    if RE_HASH_STRING.match(phone) is None:
        phone = re.sub('[^\\d]', '', phone)

        id_hash = md5(phone)
        id_prefix = phone[:4] if len(phone) > 3 else ''

        return id_prefix, id_hash
    else:
        raise Exception("Already a 32-chars length hash representation")


# TODO: make better email normalization
splitter_symbols = [" ", "/", '"', "'", "\t", "\r", "\n", "%3a", ":", "%", "=", ';', '(', ')', ',']
remove_symbols = ["&lt;", "&nbsp;", "%"]
bad_symbols = splitter_symbols + remove_symbols

email_simple_regex = re.compile(r"^([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9]+)$")


def remove_custom_garbage_from_email(orig_email):
    if any(s in orig_email for s in bad_symbols):
        email = orig_email.strip()

        for remove_symbol in remove_symbols:
            email = email.replace(remove_symbol, '')

        for splitter in splitter_symbols:
            if email.find(splitter) != -1:
                for part in email.split(splitter):
                    if part.find("@") != -1:
                        email = part  # keep only one part

        if email_simple_regex.match(email):
            return email
        else:
            return None
    else:
        return orig_email


def norm_login(orig_login):
    # CRYPTR-223 pdd
    if '@' in orig_login:  # email can be used as login, e.g. org logins (at standalone domains)
        return norm_email(orig_login)
    else:
        return orig_login.strip().replace('.', '-').lower()


def norm_mailru(email):
    parts = email.split('@')
    parts[0].lower() + '@' + 'mail.ru'


def login_to_email(orig_login):
    normed_login = norm_login(orig_login)
    if normed_login:

        # CRYPTAIS-1582: synthetic phone and social auth
        if 'phne-' in normed_login or 'uid-' in normed_login:
            return None

        # CRYPTR-223 pdd
        if '@' in normed_login:
            return normed_login

        return normed_login + '@yandex.ru'
    else:
        return None


def denorm_email(email):
    emails = [email]
    email_parts = email.split('@')
    if len(email_parts) == 2:
        if email_parts[1] == 'yandex.ru':
            emails.append(email_parts[0] + '@ya.ru')
            dots_login = email_parts[0].replace('-', '.')
            if dots_login != email_parts[0]:
                emails.append(dots_login + '@yandex.ru')
                emails.append(dots_login + '@ya.ru')
        elif email_parts[1] == 'gmail.com':
            emails.append(email_parts[0] + '@googlemail.com')

    return emails


def norm_email(email):
    if not email:
        return None

    # for mail.ru for example bk.ru and inbox.ru is different emails
    domain_map = {
        # yandex
        'ya.ru': 'yandex.ru',
        # google
        'googlemail.com': 'gmail.com'
    }

    email = email.strip()
    email = remove_custom_garbage_from_email(email)
    if not email:
        return None

    parts = email.split('@')
    if len(parts) > 1:
        login, domain = parts[0], parts[1]
        domain = domain.lower()
        domain = domain_map.get(domain, domain)

        if domain == 'yandex.ru':
            login = norm_login(login)
        else:
            login = login.lower()

        email = login + '@' + domain

    return email


def get_yuid_creation_date(yuid):
    try:
        _ = int(yuid)
        yuid_ts = int(yuid[-10:])
        too_old = 946684800  # 2001-01-01
        too_new = int(time.time()) + 604800  # now + one week
        if too_old < yuid_ts < too_new:
            return ts_to_date_str(yuid_ts)
        else:
            return None
    except ValueError:
        return None


def get_yuid_activity_type(activity_dates, yuid_creation_date):
    if activity_dates:
        if len(activity_dates) == 1 and list(activity_dates)[0] == yuid_creation_date:
            return 'private'
        elif len(activity_dates) == 1:
            return 'one_day'
        else:
            return 'active'
    else:
        return 'not_active'


def get_iis_crypta_id(id_value, id_type='yuid'):
    params = {
        'header.type': 'IDENTIFY',
        'header.version': 'VERSION_4_10',
        'body.inspect': 'true',
        'body.web_fp.%s' % id_type: id_value
    }
    resp = requests.get('http://idserv.rtcrypta.yandex.net:8080/json/identify', params=params)
    data = resp.json()
    return data['body']['crypta_id'].encode('UTF-8')


import urllib2
import json

YABS_HOST = 'yabs.yandex.ru'
# YABS_URL = 'http://%s/bigb?operation=6&call-bsbts=1' % YABS_HOST
YABS_URL = 'http://%s/bigb?operation=6' % YABS_HOST


def get_bb_info(yandexuid, host=YABS_HOST, url=YABS_URL):
    headers = {'Cookie': 'yandexuid={}'.format(yandexuid), 'Host': host}
    request = urllib2.Request(url, None, headers)
    response = urllib2.urlopen(request)
    resp = json.load(response)
    print json.dumps(resp, sort_keys=True, indent=4)


def parse_dict_field(value):
    value_parts = value.split(',')
    try:
        return dict(value_part.split(':', 1) for value_part in value_parts)
    except ValueError:
        return {}


def convert_to_dict_field(value_dict):
    return ','.join('%s:%s' % (conn_type, conn_count) for conn_type, conn_count in value_dict.iteritems())


def is_true(rec_value):
    # TODO: this method is only needed during yt 0.6 -> 0.7 migration for compatibility reasons
    return rec_value == 'true' or rec_value is True


def is_false(rec_value):
    # TODO: this method is only needed during yt 0.6 -> 0.7 migration for compatibility reasons
    return rec_value == 'false' or rec_value is False


def top(dict_counter):
    """
    :param dict_counter: collections.Counter
    :return: single most commmon element if it exists or None
    """
    mc = dict_counter.most_common()
    if mc:
        return mc[0][0]
    else:
        return None


def top_activity(activity_dict):
    """

    :param activity_dict: dict(id -> dates collection)
    :return: single most active element if it exists or None
    """
    return top(Counter({k: len(v) for k, v in activity_dict.iteritems()}))


def union(several_iterables):
    return list(set(chain.from_iterable(several_iterables)))
