from __future__ import division
import logging
import itertools
from collections import Counter, defaultdict, namedtuple

import six


class CustomCounter(Counter):
    def __add__(self, other):
        if not isinstance(other, Counter):
            return NotImplemented
        result = CustomCounter()
        for elem, count in self.items():
            newcount = count + other[elem]
            if newcount >= 0:
                result[elem] = newcount
        for elem, count in other.items():
            if elem not in self and count >= 0:
                result[elem] = count
        return result


def merge_dict_pair(a, b):
    keys = a.keys() + b.keys()
    merger = dict.fromkeys(keys)

    for key in merger.iterkeys():
        if key not in a:
            merger[key] = b[key]
        elif key not in b:
            merger[key] = a[key]
        else:
            if isinstance(a[key], dict) and isinstance(b[key], dict):
                merger[key] = merge_dicts(a[key], b[key])
            elif isinstance(a[key], list) and isinstance(b[key], list):
                merger[key] = a[key] + b[key]
            else:
                raise TypeError('Items to merge should be either dicts or lists')

    return merger


def merge_dicts(*dicts):
    if not dicts:
        return {}

    while len(dicts) > 1:
        dicts = [
            merge_dict_pair(a, b)
            for a, b in itertools.izip_longest(dicts[::2], dicts[1::2], fillvalue={})
        ]

    return dicts[0]


def get_dicts_diff(a, b, diff_function):
    keys = a.keys() + b.keys()
    diff = dict.fromkeys(keys)

    for key in diff.iterkeys():
        if key not in a:
            diff[key] = b[key]
        elif key not in b:
            diff[key] = a[key]
        else:
            if isinstance(a[key], dict) and isinstance(b[key], dict):
                diff[key] = get_dicts_diff(a[key], b[key], diff_function)
            elif isinstance(a[key], (int, float)) and \
                    isinstance(b[key], (int, float)):
                diff[key] = diff_function(a[key], b[key])
            elif a[key] is None and b[key] is None:
                continue
            else:
                diff[key] = 'Incomparable'

    return diff


Agg = namedtuple('Agg', ('field', 'operation'))
# TODO: Implement all these fields classes as python classes with __hash__, rewrite _get_iterable_by_* methods accordingly.
GroupBy = namedtuple('GroupBy', ('by', 'aggs', 'name'))
Func = namedtuple('Func', ('function', 'name', 'used_fields'))


def calculate_log_statistics(log, config):
    result = {
        'counts': CustomCounter(),
        'data': defaultdict(lambda: defaultdict(list))
    }

    fields_of_interest = {}
    for log_name in config:
        fields_of_interest[log_name] = set()
        for field in config[log_name]:
            if isinstance(field, six.string_types):
                fields_of_interest[log_name].add(field)
            elif isinstance(field, GroupBy):
                fields_of_interest[log_name].update(field.by)
                aggs = field.aggs
                if isinstance(aggs, Agg):
                    fields_of_interest[log_name].add(aggs.field)
                else:
                    fields_of_interest[log_name].update(x.field for x in aggs)
            elif isinstance(field, Func):
                fields_of_interest[log_name].update(field.used_fields)
            elif all(map(lambda x: isinstance(x, six.string_types), field)):
                fields_of_interest[log_name].update(field)
            else:
                logging.debug('Unknown field in config: %s', field)

    for log_name, log_content_with_hash in log.iteritems():
        log_content = log_content_with_hash[:-1]
        result['counts'][log_name] += len(log_content)

        if log_name in config:
            for log_field in fields_of_interest[log_name]:
                result['data'][log_name][log_field] += [item.get(log_field) for item in log_content if item is not None]
    result['data'] = dict(result['data'])

    return result


def calculate_response_statistics(response, config):
    log = response['logs_data']['diff_data']
    return calculate_log_statistics(log, config)


def diff_in_percents(a, b):
    if a == b:
        return 0
    try:
        result = (b - a) * 100 / a
    except ZeroDivisionError:
        result = float("inf")
    return result


def _generate_groupby_aggs(aggs):
    if isinstance(aggs, Agg):
        aggs = [aggs]
    result = dict()
    for agg in aggs:
        if agg.field not in result:
            result[agg.field] = []
        result[agg.field].append(agg.operation)
    return result


def _build_aggregated(log_dict, groupby_field):
    import pandas as pd

    df = pd.DataFrame(log_dict)
    by = list(groupby_field.by) if isinstance(groupby_field.by, tuple) else groupby_field.by
    aggregated = df.groupby(by).agg(_generate_groupby_aggs(groupby_field.aggs))
    return aggregated


def _get_iterable_by_groupby(log_dict, groupby_field):
    """
    log_dict: dict with log columns as fields, lists of values as values, like
    {
        'RequestID': [1, 2, 3],
        'HitLogID': [4, 5, 6]
    }
    """
    aggregated = _build_aggregated(log_dict, groupby_field)
    if isinstance(groupby_field.aggs, Agg):
        return aggregated[groupby_field.aggs.field][groupby_field.aggs.operation]
    else:
        # temporary we support only first field in agg, because this function must return iterable (Series in this case)
        return aggregated[groupby_field.aggs[0].field][groupby_field.aggs[0].operation]


def _get_iterable_by_tuple(log_dict, tuple_field):
    import numpy as np
    return np.multiply(*[_get_iterable_by_field(log_dict, f) for f in tuple_field])


def _get_iterable_by_function(log_dict, func_field):
    try:
        return func_field.function(log_dict)
    except Exception as e:
        logging.error(e, exc_info=True)
        return []


def _get_iterable_by_field(log_dict, field):
    if isinstance(field, Func):
        return _get_iterable_by_function(log_dict, field)
    if isinstance(field, GroupBy):
        return _get_iterable_by_groupby(log_dict, field)
    if isinstance(field, tuple):
        return _get_iterable_by_tuple(log_dict, field)
    if field in log_dict:
        return log_dict[field]
    logging.debug('Did not find matching function for field %s', field)
    return []


def _render_field_name(field):
    if isinstance(field, (Func, GroupBy)):
        return field.name
    if isinstance(field, tuple):
        return 'dot' + str(field).replace('\'', '')
    return str(field)


def _render_field(field, operation):
    return '{value}.{operation}'.format(operation=operation.__name__, value=_render_field_name(field))


def aggregate_statistics(statistics, config):
    result = defaultdict(dict)
    data = statistics['data']
    counts = statistics['counts']

    for log_name, fields in config.iteritems():
        for field, operations in fields.iteritems():
            try:
                value = _get_iterable_by_field(data[log_name], field)
            except KeyError as e:
                logging.error('Got KeyError in log %s on field %s: %s', log_name, field, e)
                continue
            for operation in operations:
                key = _render_field(field, operation)
                try:
                    result[log_name][key] = operation(value)
                except ValueError as e:
                    logging.error('Got value error in log %s on field %s, operation %s: %s', log_name, field, operation, e)
                    result[log_name][key] = None
                except Exception as e:
                    msg = 'key={key}\nvalue={value}\noperation={operation}\nmessage={message}'.format(
                        key=key,
                        value=value,
                        operation=operation.__name__,
                        message=e.message
                    )
                    raise type(e)(msg)

    for key, value in counts.iteritems():
        result[key]['size'] = counts[key]

    return result


def hitlogid_bannerid_eventcost_rank(d):
    aggregated = _build_aggregated(d, GroupBy(('HitLogID', 'BannerID'), (Agg('EventCost', 'max'), Agg('Rank', 'max')), None))
    return aggregated['EventCost']['max'] * aggregated['Rank']['max']


def hitlogid_bannerid_billcost_rank(d):
    aggregated = _build_aggregated(d, GroupBy(('HitLogID', 'BannerID'), (Agg('BillCost', 'max'), Agg('Rank', 'max')), None))
    return aggregated['BillCost']['max'] * aggregated['Rank']['max']


def hitlogid_bannerid_shows(d):
    import pandas as pd

    df = pd.DataFrame(d)
    return df[df['CounterType'] == 1].groupby(by=['HitLogID', 'BannerID']).count()


def hitlogid_bannerid_clicks(d):
    import pandas as pd

    df = pd.DataFrame(d)
    return df[df['CounterType'] == 2].groupby(by=['HitLogID', 'BannerID']).count()
