#!/usr/bin/env python

from nile.api.v1 import (
    aggregators as na,
    extractors as ne,
    filters as nf,
    cli
)

from qb2.api.v1 import (
    extractors as qe,
    filters as qf,
    QB2
)

from qb2.api.v1.typing import (
    Optional, Integer,
    String, List,
    Dict, Float
)


def get_key(json_dict, key):
    try:
        return json_dict[key]
    except:
        return None


def get_mid_from_remote_answer(txt):
    try:
        return int(txt.split("; ")[1].split(" ")[1])
    except:
        return None


def str_to_int(txt):
    try:
        return int(txt)
    except:
        return None


def str_to_list_comma(txt):
    try:
        return txt.split(",")
    except:
        return []


def str_to_list_semicolon(txt):
    try:
        return txt.split(";")
    except:
        return []


def parse_lsa(txt):
    try:
        output = {}
        values = txt.split("; ")
        for v in values:
            parts = v.split(":")
            output[parts[0]] = float(parts[1])

        return output
    except:
        return None


def prep_inbound_emails_uj(job):
    return job.table("//home/mail-logs/core/mail-user-events/1d/@dates") \
        .qb2(log="generic-yson-log",
             fields=[qe.integer_log_field("uid"),
                     qe.log_field("stateJson").hide(),
                     qe.custom("sender_email", lambda stateJson: get_key(stateJson, "emailFrom")).add_hints(
                         type=Optional[String]),
                     qe.custom("mid", lambda stateJson: str_to_int(get_key(stateJson, "mid"))).add_hints(
                         type=Optional[Integer]),
                     qe.custom("stid", lambda stateJson: get_key(stateJson, "stid")).add_hints(type=Optional[String]),
                     qe.custom("subject", lambda stateJson: get_key(stateJson, "subject")).add_hints(
                         type=Optional[String]),
                     qe.custom("ftype", lambda stateJson: str_to_int(get_key(stateJson, "ftype"))).add_hints(
                         type=Optional[Integer]),
                     qe.custom("fid", lambda stateJson: str_to_int(get_key(stateJson, "fid"))).add_hints(
                         type=Optional[Integer]),
                     qe.log_field("operation").hide()
                     ],
             filters=[qf.equals("operation", "receive"),
                      qf.defined("uid", "mid", "stid", "fid", "ftype"),
                      qf.not_(qf.equals("stid", ""))
                      ]
             )


def prep_inbound_emails_nw(job):
    return job.table("//home/logfeller/logs/mail-nwsmtp-log/1d/@dates") \
        .qb2(log="generic-yson-log",
             fields=[qe.log_field("remote-answer").hide(),
                     qe.custom("mid", get_mid_from_remote_answer, "remote-answer").add_hints(type=Optional[Integer]),
                     qe.log_field("rcpt_uid").hide(),
                     qe.custom("uid", str_to_int, "rcpt_uid").add_hints(type=Optional[Integer]),
                     qe.log_field("so-labels").hide(),
                     qe.custom("types", str_to_list_comma, "so-labels").add_hints(type=Optional[List[String]]),
                     qe.log_field("so-status").rename("so_status").add_hints(type=Optional[String]),
                     # qe.log_field("avir-status").rename("avir_status").add_hints(type=Optional[String]),
                     qe.log_field("client").rename("sender_client").add_hints(type=Optional[String]),
                     qe.log_field("_other").rename("other").hide(),
                     qe.custom("avir2_status", lambda other: get_key(other, "avir2-status")).add_hints(
                         type=Optional[String]),
                     qe.log_field("direction").hide(),
                     qe.log_field("message-id").rename("msg_id").add_hints(type=Optional[String]),
                     qe.log_field("smtp-session").rename("queue_id").add_hints(type=Optional[String]),
                     qe.log_field("sent-status").hide()
                     ],
             filters=[qf.equals("direction", "in"),
                      qf.defined("uid", "mid", "msg_id", "queue_id"),
                      qf.equals("sent-status", "accept"),
                      qf.not_(qf.equals("so_status", "unknown")),
                      qf.not_(qf.equals("avir2_status", "unknown")),
                      qf.not_(qf.equals("msg_id", "")),
                      qf.not_(qf.equals("queue_id", ""))
                      ]
             )


def prep_inbound_emails_so(job):
    return job.table("//statbox/mail-so-ml-log/@dates") \
        .qb2(log="generic-yson-log",
             fields=[qe.log_field("from").rename("sender_domain").add_hints(type=Optional[String]),
                     qe.log_field("lngg").rename("language").add_hints(type=Optional[String]),
                     qe.integer_log_field("x-original-size").rename("size").add_hints(type=Optional[Integer]),
                     qe.log_field("spam").rename("spam_status").add_hints(type=Optional[String]),
                     qe.log_field("x-yandex-queueid").rename("queue_id").add_hints(type=Optional[String]),
                     qe.log_field("msid").rename("msg_id").add_hints(type=Optional[String]),
                     qe.log_field("logcl").hide(),
                     # qe.custom("types", str_to_list_comma, "logcl").add_hints(type=Optional[List[String]]),
                     qe.log_field("rcpt_uid").hide(),
                     qe.custom("uids", str_to_list_semicolon, "rcpt_uid").hide(),
                     qe.unfold("raw_uid", "uids").hide(),
                     qe.custom("uid", lambda raw_uid: str_to_int(raw_uid)).add_hints(type=Optional[Integer]),
                     # qe.log_field("lsa").rename("raw_lsa").hide(),
                     # qe.custom("lsa", parse_lsa, "raw_lsa").add_hints(type=Optional[Dict[String, Float]]),
                     # qe.log_field("subj").rename("subject").add_hints(type=Optional[String]),
                     # qe.log_field("r_sp").rename("raw_r_sp").hide(),
                     # qe.custom("r_sp", str_to_list_semicolon, "raw_r_sp").add_hints(type=Optional[List[String]]),
                     # qe.log_field("r_cancel").rename("raw_r_cancel").hide(),
                     # qe.custom("r_cancel", str_to_list_semicolon, "raw_r_cancel").add_hints(type=Optional[List[String]]),
                     ],
             filters=[qf.defined("uid", "msg_id", "queue_id"),
                      qf.not_(qf.equals("msg_id", "")),
                      qf.not_(qf.equals("queue_id", ""))
                      ]
             )


def prep_inbound_emails(job):
    return prep_inbound_emails_nw(job).join(
        prep_inbound_emails_so(job),
        by=("uid", "msg_id", "queue_id"),
        type="left"
    ).join(
        prep_inbound_emails_uj(job),
        by=("uid", "mid"),
        type="left"
    ).filter(qf.one_of("so_status", ["ham", "delivery"]),
             qf.not_(qf.equals("spam_status", "yes")),
             qf.equals("avir2_status", "clean")
             )


@cli.statinfra_job
def make_job(job, nirvana, statface_client):
    output_table = nirvana.output_tables[0]
    prep_inbound_emails(job).put(output_table)
    return job


if __name__ == '__main__':
    cli.run()
