#!/usr/bin/env python2
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import os, sys
from tempfile import NamedTemporaryFile
from datetime import time, datetime, timedelta
from collections import defaultdict
from socket import gethostname

TYPES = {"H": "antifoo", "S": "foo", "U": "unsubscribe"}
#COMPLLOG_FILEMASK = "/u0/" + gethostname() + "/usr/local/www/logs/compl.log-%Y-%m-%d"
COMPLLOG = "/u0/" + gethostname() + "/logs/compl.log"

def get_footype(foo, flags):
    if flags.find("_UN") >= 0:
        return TYPES["U"]
    return TYPES.get(foo, "-")

def get_daystart(dt):
    return dt.replace(hour=0, minute=0, second=0, microsecond=0)

def get_monthstart(dt):
    return int((dt - timedelta(days=dt.day - 1)).date().strftime("%s"))

def get_weekstart(dt):
    return int((dt - timedelta(day=dt.weekday())).date().strftime("%s"))

def is_corp(suid):
    return len(suid) == 16 and suid.startswith("112000")

def get_weekdates(startdate):
    start = startdate
    while (startdate - start).days < 7:
        yield startdate
        startdate += timedelta(days=1)

def get_monthdates(startdate, today):
    month = startdate.month
    while startdate.month == month and startdate < today:
        yield startdate
        startdate += timedelta(days=1)

BORDERS = (1, 3, 10, 100)
def build_stat(f, stat, prefix):
    info = defaultdict(int)
    for (footype, timestamp, suid), count in sorted(stat.iteritems()):
        for border in BORDERS:
            if count >= border:
                info[(footype, timestamp, border)] += 1
    for (footype, timestamp, border), count in info.iteritems():
        print >>f, "%s.%s.%d" % (prefix, footype, border), "set_%d" % count, timestamp

today = datetime.today()
today_week = get_weekstart(today)
today_month = get_monthstart(today)

print datetime.today().strftime("[%Y-%m-%d %H:%M:%S]"), "Started"

if sys.argv[1] == "day":
    daterange = [get_daystart(today - timedelta(days=1))]

elif sys.argv[1] == "week":
    startdate = today - timedelta(days=7)
    startdate = datetime.combine(startdate - timedelta(days=startdate.weekday()), time())
    daterange = get_weekdates(startdate)

elif sys.argv[1] == "month":
    startdate = today - timedelta(days=7)
    startdate = datetime.combine(startdate.date().replace(day=1), time())
    daterange = get_monthdates(startdate, today)

else:
    print "Usage: masscomplainants.py week|month"
    exit(1)

stat_day = defaultdict(int)
stat_week = defaultdict(int)
stat_month = defaultdict(int)
stat_corp_day = defaultdict(int)
stat_corp_week = defaultdict(int)
stat_corp_month = defaultdict(int)
errors = 0

for dt in daterange:
    day = int(dt.strftime("%s"))
    week = get_weekstart(dt)
    month = get_monthstart(dt)
    n = (today - dt).days
    try:
        for i, line in enumerate(open(COMPLLOG + (".{}".format(n) if n > 0 else ""))):
            if i % 100000 == 0:
                print >> sys.stderr, i, errors
            try:
                parts = line.split("\t")
                if parts[5].find("_XX") >= 0 or parts[-1] == "<skip>":
                    continue
                foo, flags, suid = parts[2], parts[5], parts[7]

                footype = get_footype(foo, flags)
                corp = is_corp(suid)

                stat_day[(footype, day, suid)] += 1
                if corp:
                    stat_corp_day[(footype, day, suid)] += 1
                if week < today_week:
                    stat_week[(footype, week, suid)] += 1
                    if corp:
                        stat_corp_week[(footype, week, suid)] += 1
                if month < today_month:
                    stat_month[(footype, month, suid)] += 1
                    if corp:
                        stat_corp_month[(footype, month, suid)] += 1
            except:
                errors += 1
    except:
        errors += 1

f = NamedTemporaryFile(delete = True)

if sys.argv[1] == "day":
    build_stat(f, stat_day, "so.masscompl.day")
    build_stat(f, stat_corp_day, "so.masscompl.corp.day")
elif sys.argv[1] == "week":
    build_stat(f, stat_week, "so.masscompl.week")
    build_stat(f, stat_corp_week, "so.masscompl.corp.week")
elif sys.argv[1] == "month":
    build_stat(f, stat_month, "so.masscompl.month")
    build_stat(f, stat_corp_month, "so.masscompl.corp.month")

f.flush()
os.system("/opt/plotnik/plotnik_collector.py -f %s" % f.name)

print datetime.today().strftime("[%Y-%m-%d %H:%M:%S]"), "Done"
