#!/usr/bin/python2
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import sys, re, argparse
import yt.wrapper as ytw
import nirvana.mr_job_context as nv
from datetime import date
from urllib import urlopen

SAVE_MODEL_STATUS_URL = "https://web.so.yandex-team.ru/ml/save_model_status/?workflow_id=%s&workflow_instance_id=%s&status=pool_gathering&route=%s"
CMPL_TYPE_RE = "foo"
CMPL_USER_COMPL_COUNT = 10

@ytw.with_context
class CmplLogMapper:
    def __init__(self, cmpl_type_re):
        self.cmpl_type_re = cmpl_type_re
    def __call__(self, record, context):
        so_res = record.get("so_res", "")
        so_res = int(so_res) if so_res.isdigit() else 0
        if self.cmpl_type_re.search(record.get("type", "")) and (so_res == 1 or so_res == 4) and record.get("queueid", "").find("-") > 1:
            yield {
                "actdate":  record["actdate"].split()[0],
                "msgdate":  record["msgdate"].split()[0],
                "queueid":  record["queueid"],
                "karma":    record.get("karma", ""),
                "type":     record.get("type", ""),
                "rcpt_uid": record.get("uid", "")
            }

@ytw.with_context
class CmplLogReducer:
    def __init__(self, cmpl_user_compl_cnt):
        self.cmpl_user_compl_cnt = cmpl_user_compl_cnt
    def __call__(self, key, records, context):
        recs = list(records)
        c = len(recs)
        if self.cmpl_user_compl_cnt and c < self.cmpl_user_compl_cnt:
            for r in recs:
                if r['msgdate'] <= key['actdate']:
                    r['c_count'] = c
                    yield ytw.create_table_switch(1)
                    yield r
            yield ytw.create_table_switch(0)
            yield {'rcpt_uid': key['rcpt_uid'], 'actdate': key['actdate']}

@ytw.with_context
def uidsReducer(key, records, context):
    actdate_max = ''
    for r in records:
        if not actdate_max:
            actdate_max = r['actdate']
            continue
        if r['actdate'] > actdate_max:
            actdate_max = r['actdate']
    yield {'rcpt_uid': key['rcpt_uid'], 'actdate_max': actdate_max}

def doRequest(url, prompt):
    try:
        f = urlopen(url)
        if f.getcode() == 200:
            return f.read()
        else:
            print >>sys.stderr, '{0} response HTTP code: {1}, body: {2}'.format(prompt, f.getcode(), f.info())
    except Exception, e:
        print >>sys.stderr, '%s HTTP request failed: %s' % (prompt, str(e))
    return ""

if __name__ == "__main__":
    CMPLLOG_YT_PATH = "//home/logfeller/logs/mail-so-compl-log/1d"
    CMPLLOG_MAP_PATH = "//home/so_fml/nirvana/cmpllog_map_"
    CMPLLOG_UIDS_PATH = "//home/so_fml/nirvana/cmpllog_uids_"
    CMPLLOG_DATE = date.today().isoformat()
    parser = argparse.ArgumentParser()
    parser.add_argument('-c', '--cmpltype',       type = str, help = "complaints type's regexp for filtering records from complaints' log")
    parser.add_argument('-d', '--ref_date',       type = str, help = "reference date for gathering complaints log", default = date.today().isoformat())
    parser.add_argument('-o', '--output',         type = str, help = "path to mapped complaints log in YT")
    parser.add_argument('-u', '--output_uids',    type = str, help = "path to YT table with uids of complainants")
    parser.add_argument('-n', '--user_compl_cnt', type = int, help = "max count of user complaints for one date")
    parser.add_argument('-r', '--route',          type = str, help = "The type of mail for which the model is calculated")
    args = parser.parse_known_args()[0]
    if args.cmpltype:
        CMPL_TYPE_RE = args.cmpltype
    if args.ref_date:
        CMPLLOG_DATE = args.ref_date
    CMPLLOG_UIDS_PATH = args.output_uids if args.output_uids else (CMPLLOG_UIDS_PATH + CMPLLOG_DATE)
    CMPLLOG_MAP_PATH = args.output if args.output else (CMPLLOG_MAP_PATH + CMPLLOG_DATE)
    CMPLLOG_UIDS_TMP_PATH = CMPLLOG_UIDS_PATH[:CMPLLOG_UIDS_PATH.rfind('/') + 1] + CMPLLOG_DATE
    CMPL_USER_COMPL_COUNT = args.user_compl_cnt if args.user_compl_cnt else 0
    ROUTE = args.route if args.route else 'in'
    ctx = nv.context()
    meta = ctx.get_meta()
    doRequest(SAVE_MODEL_STATUS_URL % (meta.get_workflow_uid(), meta.get_workflow_instance_uid(), ROUTE), 'Saving model status')
    cmpl_type_re = re.compile(CMPL_TYPE_RE)
    ytw.run_map_reduce(CmplLogMapper(cmpl_type_re), CmplLogReducer(CMPL_USER_COMPL_COUNT), map(lambda f: CMPLLOG_YT_PATH + "/" + f, sorted(ytw.cypress_commands.list(CMPLLOG_YT_PATH))),
                       [CMPLLOG_UIDS_TMP_PATH, CMPLLOG_MAP_PATH], reduce_by = ["rcpt_uid", "actdate"], spec = {"job_io": {"control_attributes": {"enable_table_index": True}}})
    ytw.run_sort(CMPLLOG_UIDS_TMP_PATH, sort_by = ["rcpt_uid"])
    ytw.run_reduce(uidsReducer, CMPLLOG_UIDS_TMP_PATH, CMPLLOG_UIDS_PATH, reduce_by = "rcpt_uid")
    ytw.remove(CMPLLOG_UIDS_TMP_PATH, force = True)
    ytw.run_sort(CMPLLOG_UIDS_PATH, sort_by = ["rcpt_uid"])
    ytw.run_sort(CMPLLOG_MAP_PATH, sort_by = "queueid")
