#!/usr/bin/env python2
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import time, pymongo, zmq
from multiprocessing.pool import Pool

# ------------------------- Cleanup parameters ---------------------------
ONLY_PRINT = False
KDAY_TIME_BASE = 946674000
YEAR = 365
HALFYEAR = int(YEAR / 2)
CLEANUP_PERIOD = {
    "fromto":   HALFYEAR,
    "compl":    YEAR,
    "fromstat": YEAR
}
# ------------------------- DB hosts -------------------------------------
SERVERS = ["longshingler1%s.so.yandex.net" % letter for letter in "ghm"]
# ------------------------- LogXY parameters -----------------------------
ZMQ_LOGXY_ADDRESS = "ipc:///tmp/logxy.sock"
ZMQ_LOGXY_QUEUE_SIZE = 100000
LOG="/var/log/so-logs/longshingler_cleanup.log"

# ------------------------- Auxiliary functions --------------------------
def current_day():
    return (int(time.time()) - KDAY_TIME_BASE) / 86400

class writeLog:
    def __init__(self):
        self.context = zmq.Context()
        self.client = self.context.socket(zmq.PUSH)
        self.client.setsockopt(zmq.SNDHWM, ZMQ_LOGXY_QUEUE_SIZE)
        self.client.connect(ZMQ_LOGXY_ADDRESS)

    def __call__(self, msg):
        try:
            self.client.send("%s\t[%s] %s\n" % (LOG, time.strftime("%Y-%m-%d %H:%M:%S"), msg), zmq.NOBLOCK)
        except Exception, e:
            print("ZMQ error: %s" % str(e))

def cleanupCollection(collection_name):
    db = pymongo.MongoClient("mongodb://longshingler:longshingler@%s/LONGSHINGLER" % ",".join(SERVERS))["LONGSHINGLER"]
    day, i, n, err, log = current_day(), 0, 0, 0, writeLog()
    try:
        total = db.command("collstats", collection_name)['count']
    except Exception, e:
        log("Collection: %s, DB Error: %s" % (collection_name, str(e)))
    log("Collection: %s, Total: %d" % (collection_name, total))
    try:
        for obj in db[collection_name].find():
            i += 1
            if i % 1000 == 0:
                day = current_day()
                if i % 100000 == 0:
                    log("%s: scanned %d records" % (collection_name, i))
            lasttime = obj.get("lt", None)
            if lasttime and day - lasttime >= CLEANUP_PERIOD[collection_name]:
                if ONLY_PRINT:
                    log("%s %d %s" % (collection_name, i, obj["_id"]))
                else:
                    try:
                        res = db[collection_name].delete_one({"_id": obj["_id"]})
                        if res and isinstance(res, pymongo.results.DeleteResult):
                            if res.deleted_count > 0:
                                n += res.deleted_count
                            else: err += 1
                            if n % 1000 == 0:
                                log("%s: %d %d %d %s" % (collection_name, i, n, err, str(obj['_id'])))
                        else: err += 1
                    except Exception, e:
                        log("%s: %d\nDB Error: %s" % (collection_name, i, str(e)))
        log("Collection %s: [ Done ]" % collection_name)
    except Exception, e:
        log("Collection: %s, DB Error: %s" % (collection_name, str(e)))
    return (collection_name, n, err)

if __name__ == '__main__':
    day, log = current_day(), writeLog()
    log("Current day: %d, Collections to be cleaned: %s" % (day, ",".join(CLEANUP_PERIOD.keys())))
    pool = Pool(len(CLEANUP_PERIOD.keys()))
    pool.map(cleanupCollection, ["fromto", "compl", "fromstat"])
    for (collection_name, n, err) in pool.map(cleanupCollection, ["fromto", "compl", "fromstat"]):
        log("In collection %s deleted %d records (%d errors)" % (collection_name, n, err))
