#!/usr/bin/python
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import sys, re
from datetime import date
import yt.wrapper as ytw

d = sys.argv[1] if len(sys.argv) > 1 else date.today().isoformat()
YT_TMP_FOLDER = "//home/so_fml/klimiky"
YT_USERS_ACTSLOG = "//logs/mail-user-journal-log/1d/%s" % d
YT_MIDS_READ_LOG = "%s/mids-users-read-log-%s" % (YT_TMP_FOLDER, d)
YT_MIDS_RECEIVE_LOG = "%s/mids-users-receive-log-%s" % (YT_TMP_FOLDER, d)
YT_USERS_READS_LOG = "%s/users-reads-log-%s" % (YT_TMP_FOLDER, d)

def usersActsLogMapper(record):
    if "state" in record:
        if record['state'].find("read;") > -1:
            m = re.search(r'\d+', record['state'])
            if m:
                yield {
                    "@table_index": 0,
                    "mid":          m.group(0),
                    "action":       "read"
                }
        elif "operation" in record and record["operation"] == "receive":
            m = re.search(r'\bmids?=(\d+)', record["state"])
            if m and "connectionId" in record:
                yield {
                    "@table_index": 1,
                    "mid":          m.group(1),
                    "action":       "receive",
                    "qid":          record["connectionId"]
                }

def usersReadLogReducer(key, records):
    mid, qid = "", ""
    for rec in records:
        if rec["@table_index"] == 0:
            if 'mid' in rec and rec['mid']: mid = rec['mid']
        elif rec["@table_index"] == 1:
            if 'mid' in rec and rec['mid']: qid = rec['qid']
    if mid and qid:
        yield {"mid": mid, "qid": qid}

if __name__ == "__main__":
    print "Start mapping..."; sys.stdout.flush()
    ytw.run_map(usersActsLogMapper, YT_USERS_ACTSLOG, [YT_MIDS_READ_LOG, YT_MIDS_RECEIVE_LOG],
                spec = {"job_io": {"control_attributes": {"enable_row_index": True}}})
    print "Mapping done. Start sorting..."; sys.stdout.flush()
    ytw.run_sort(YT_MIDS_READ_LOG, sort_by = "mid")
    ytw.run_sort(YT_MIDS_RECEIVE_LOG, sort_by = "mid")
    print "Sorting done. Start reduce..."; sys.stdout.flush()
    ytw.run_reduce(usersReadLogReducer, [YT_MIDS_READ_LOG, YT_MIDS_RECEIVE_LOG], YT_USERS_READS_LOG, reduce_by = "mid",
                spec = {"job_io": {"control_attributes": {"enable_row_index": True}}})
    print "Reducing done. Start sorting..."; sys.stdout.flush()
    ytw.run_sort(YT_USERS_READS_LOG, sort_by = "qid")
    print "[ Done ]"
