from datetime import datetime, timedelta
from collections import defaultdict, namedtuple
import pytz


def group_by(iterables, key_func):
    groups = defaultdict(list)
    for item in iterables:
        groups[key_func(item)].append(item)
    return list(groups.items())


class ActivityHistReducer(object):

    DEFAULT_TIMEZONE = 'Europe/Moscow'
    WEEKENDS_DAYS = (5, 6)
    HOUR = 3600

    def __init__(self, holidays, days_to_take, date_format, ext_id_key, retro_date=None):
        self.holidays = holidays
        self.days_to_take = days_to_take
        self.date_format = date_format
        self.ext_id_key = ext_id_key
        self.retro_date = retro_date

    def __call__(self, key, recs):

        retro_date = datetime.strptime(self.retro_date or key[self.ext_id_key].rsplit('_', 1)[1], self.date_format)
        timezone_name = None

        uniq_rounded_timestamp = set()
        for rec in recs:
            if 'timezone_name' in rec:
                timezone_name = rec['timezone_name']
            else:
                uniq_rounded_timestamp.add(rec['timestamp'] - (rec['timestamp'] % self.HOUR))

        timezone = pytz.timezone(timezone_name or self.DEFAULT_TIMEZONE)

        TimeItem = namedtuple('TimeItem', ['date_category', 'date_string', 'hour'])
        activity_items = {
            TimeItem(
                date_category=(
                    'holiday' if (dt.weekday() in self.WEEKENDS_DAYS or
                                  dt.strftime(self.date_format) in self.holidays) else
                    'working'
                ),
                date_string=dt.strftime(self.date_format),
                hour=str(dt.hour),
            )
            for ts in uniq_rounded_timestamp
            for dt in [datetime.fromtimestamp(ts, timezone)]
            if retro_date - timedelta(days=self.days_to_take) <= datetime.fromtimestamp(ts) <= retro_date
        }

        total_activity_days = {
            category: len({item.date_string for item in values})
            for category, values in group_by(activity_items, lambda x: x.date_category)
        }

        hist_activity_count = {
            category: {
                hour: len({item.date_string for item in sub_values})
                for hour, sub_values in group_by(values, lambda x: x.hour)
            }
            for category, values in group_by(activity_items, lambda x: x.date_category)
        }

        hist_activity_rate = {
            category: {
                hour: float(count) / total_activity_days[category]
                for hour, count in hist.items()
            }
            for category, hist in hist_activity_count.items()
        }

        yield {
            self.ext_id_key: key[self.ext_id_key],
            'hist_activity_count': hist_activity_count,
            'total_activity_days': total_activity_days,
            'hist_activity_rate': hist_activity_rate,
            'timezone_name': timezone_name
        }
