#!/usr/bin/env python


from collections import defaultdict
import math


# thresholds/etc TODO: move to config
max_events_on_IP_per_day = 50000
max_ids_in_daily_hh = 100
merge_max_hh_members = 50
min_single_hh_events = 2


# TODO: is it used anywhere?
def parse_tskv(s):
    return dict([pair.split('=', 1) for pair in s.split('\t') if '=' in pair])


def subdict(d, keys):
    return dict((k, d[k]) for k in keys if k in d)


time_histogram_bin_size = 15*60     # 15 min
time_histogram_nbins = 24*2600/time_histogram_bin_size
class TEventHistogram(object):
    def __init__(self, d=None, holiday=False):
        self.work = 0
        self.home = 0
        self.hist = defaultdict(int)
        self.holiday = holiday
        if d:
            self.load(d)
    def add(self, ts, count=1):
        self.hist[ts/time_histogram_bin_size] += count
        if self.holiday or ts <= 9*3600 or ts >= 19*3600:
            self.home += count
        elif (9*3600 + 15*60) <= ts <= (18*3600 + 45*60):
            self.work += count
    def ts_in_hist(self, ts):
        k = ts/time_histogram_bin_size
        return (k in self.hist) and (self.hist[k] > 0)
    def save(self):
        return dict((str(k), v) for k, v in self.hist.iteritems())
    def load(self, d):
        self.hist = defaultdict(int, ((int(k), v) for k, v in d.iteritems()))


distance_sq = lambda a, b: ((a[0] - b[0])**2) + ((a[1] - b[1])**2)
def find_center(geo_points):
    """
    :param geo_points: list of (lat, lon)
    :return: (lat, lon) of center
    """
    N = len(geo_points)
    if N == 0:
        return None
    if N == 1:
        return geo_points[0]

    # find average and variance
    avg_point = tuple(map(lambda x: x/N, map(sum, zip(*geo_points))))
    sigma_sq = sum([distance_sq(x, avg_point) for x in geo_points])/(N-1)
    # get rid of outliers
    good_points = [x for x in geo_points if distance_sq(avg_point, x) <= 4*sigma_sq]    # two sigmas
    # find average point again (without outliers)
    avg_point = tuple(map(lambda x: x/N, map(sum, zip(*good_points))))
    # find closest point to average
    center = min(good_points, key=lambda x: distance_sq(x, avg_point))
    return center


def id_with_type(member):
    return member['id'], member['id_type']


# util functions for uniting
single_value_keys = ['id', 'id_type', 'device_type', 'merge_weight']
additive_keys = ['events_home', 'events_work']
list_keys = ['geo_pts']
def merge_member(member1, member2):
    merged = dict()
    for k in single_value_keys:
        if k in member1 and k in member2:
            if member1[k] != member2[k]:
                raise ValueError('merge_member:: member1[{k}] != member2[{k}]; member1={m1}, member2={m2}'.
                                 format(k=k, m1=member1, m2=member2))
            merged[k] = member1[k]
        elif k in member1:
            merged[k] = member1[k]
        elif k in member2:
            merged[k] = member2[k]
    for k in additive_keys:
        if k in member1 or k in member2:
            merged[k] = member1.get(k, 0) + member2.get(k, 0)
    for k in list_keys:
        if k in member1 or k in member2:
            merged[k] = member1.get(k, []) + member2.get(k, [])
    return merged


def get_weight(member):
    """
    first: 1 for tv, 0 for non-tv, to make sure tv has biggest weight when merging HHs
    second: merge weight (num of home events)
    third, fourth: just to make sure weights are strictly different for different ids
    """
    return (1 if member['device_type'] == 'tv' else 0), member['merge_weight'], member['id_type'], member['id']


def check_hh_size(members):
    return len(members) < merge_max_hh_members


def check_single_hh(members):
    return (len(members) > 1) or (members[0]['events_home'] + members[0]['events_work'] >= min_single_hh_events)


def is_holiday(dt):
    import urllib2
    url = 'https://calendar.yandex.ru/export/holidays.xml?start_date=%s&country_id=225&who_am_i=test' % dt
    resp = urllib2.urlopen(url)
    xml = resp.read()
    resp.close()
    return 'is-holiday="1"' in xml
