#!/usr/bin/env python
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import os
import yt.wrapper as ytw
from datetime import datetime, date, timedelta

LOG_PATH = '//statbox/heavy-dict/passport_userdata/%s'
UIDS_FILES = {
    "/opt/precast-sopassportlog-parser/yuid.roll.template":              "/opt/rules/yuid.roll",
    "/opt/precast-sopassportlog-parser/msearch_sid669uid.roll.template": "/opt/rules/msearch/sid669uid.roll",
    "/opt/precast-sopassportlog-parser/outgoing_yuid.roll.template":     "/opt/rules/outgoing/yuid.roll"
}
YT_PROXY = "hahn.yt.yandex.net"
YT_POOL = "processing-mail-so"
HOME_DIR = os.environ["HOME"] if "HOME" in os.environ else "/root"


@ytw.with_context
def logMapper(record, context):
    if 'uid' in record and record['uid'] and 'sid' in record and record['sid']:
        uid, sids = record["uid"], list(record["sid"])
        if uid.isdigit() and 669 in sids:
            yield {"uid": int(uid)}


def loadCorpUsers():
    DATE, UIDS = (date.today() - timedelta(days=1)).isoformat(), []
    TMP_LOG_PATH = '//home/so_fml/tmp/staff_tmp_%s' % DATE
    ytw.config['proxy']['url'] = YT_PROXY
    ytw.config['token'] = open("%s/.yt/token" % HOME_DIR).read().strip()
    ytw.config["read_parallel"]["enable"] = True
    ytw.config['pickling']['module_filter'] = lambda module: 'hashlib' not in getattr(module, '__name__', '')
    ytw.run_map(logMapper, LOG_PATH % DATE, TMP_LOG_PATH, job_count=1000, pool=YT_POOL)
    for row in ytw.read_table(TMP_LOG_PATH, format=ytw.JsonFormat(), raw=False):
        UIDS.append(str(row["uid"]))
    ytw.remove(TMP_LOG_PATH, force=True)
    return UIDS


def main():
    print "Parsing started at", datetime.today().strftime("%Y-%m-%d %H:%M:%S")
    UIDS = loadCorpUsers()
    if len(UIDS) > 0:
        for template, output in UIDS_FILES.iteritems():
            data = open(template).read().replace("#UIDS#", '\n'.join(UIDS))
            print >> open(output, "wt"), data
    else:
        print "UIDs loading process failed: returned empty list of UIDs"
    print "Parsing done at", datetime.today().strftime("%Y-%m-%d %H:%M:%S")


if __name__ == "__main__":
    main()
