#!/usr/bin/env python
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
from __future__ import print_function
import os, os.path, sys

CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, CURRENT_DIR)
sys.path.insert(0, os.path.join(CURRENT_DIR, "libs"))
sys.path.insert(0, 'WORKING_DIR')
import cgi, cgitb
cgitb.enable()

import IPy, pymongo, json, base64
from subprocess import Popen, PIPE
from collections import defaultdict
from datetime import datetime
from multiprocessing.pool import Pool
from jinja2 import Environment, FileSystemLoader
from log_utils import writelog
from db_utils import getMongoDB
from users_utils import STATLOG
from common import getLogin, getHosts4Group


MONGO = {
    'cluster': 'mail_sostatip_db',
    'port':    27017,
    'db':      'spstat',
    'hosts':   ','.join(["sostatip1%s.mail.yandex.net" % dc for dc in "jhm"])
}
#SERVERS = getHosts4Group("so_statlog", ["statlog1%s.so.yandex.net" % letter for letter in "fghmo"])
SERVERS = STATLOG["hosts"]
LOGS = [
    (5000, "/u0/statlog/so_in/so_in.log", 1000000000),
    (5001, "/u0/statlog/so_out/so_out.log", 100000000),
    (5005, "/u0/statlog/so_corp/so_corp.log", 100000000)
]


class IpInfo:
    host = ""
    geo = ""
    total = 0
    ham = 0
    spam = 0
    malic = 0
    shingle_total = 0
    offsets = []

    def __init__( self ):
        self.offsets = []


def query((server, port, logfile, blocksize, regex, ip64, duration)):
    timerange = [2**32, 0]
    stat = []
    log = Popen("ssh -o ConnectTimeout=10 -i /home/nginx/.ssh/id_rsa_so90 robot-mailspam@%s './catcher_parser %s %d \"%s\" %s %d'" % (server, logfile, int((blocksize * duration) / 60), regex, ip64, duration), shell=True, stdout=PIPE).communicate()[0]
    for i, line in enumerate(log.splitlines()):
        try:
            if i == 0:
                timerange = map(int, line.split("\t"))[:2]
            ip, geo, host, ham, spam, malic, shingle_total, offsets = line.split("\t")
            stat.append((ip, host, geo, int(ham), int(spam), int(malic), int(shingle_total), "%s:%d" % (server, port), offsets))
        except Exception, e:
            writelog("catcher query exception: %s" % str(e), True)
    return timerange, stat


form = cgi.FieldStorage()
duration = min(300, int( form.getfirst("duration", 60)))
ip64 = form.getfirst("ip64", "64")
regex = form.getfirst("regex", "")
mincount = int(form.getfirst("mincount", 20))
mincounttd = int(form.getfirst("mincounttd", 20))
maxhamcounttd = int(form.getfirst("maxhamcounttd", 0))
markbanip = int(form.getfirst("markbanip", 0))
todaytype = form.getfirst("todaytype", "statip")
firsttime = form.getfirst("posted", "0") == "0"

if firsttime:
    markbanip = 1

arguments = []
for server in SERVERS:
    for log in LOGS:
        arguments.append((server, log[0], log[1], log[2], regex, ip64, duration))

db = {}
try:
    db = getMongoDB(MONGO)
except Exception, e:
    writelog("DB exception: %s" % str(e), True)

pool = Pool(len(arguments))
timerange = [2**32, 0]
ip_stat = defaultdict(IpInfo)

if not firsttime:
    for (q_timerange, stat) in pool.map(query, arguments):
        timerange[0] = min(timerange[0], q_timerange[0])
        timerange[1] = max(timerange[1], q_timerange[1])
        for (ip, geo, host, ham, spam, malic, shingle_total, server, offsets) in stat:
            info = ip_stat[ ip ]
            info.total += ham + spam + malic
            info.ham += ham
            info.spam += spam
            info.malic += malic
            info.shingle_total = max(info.shingle_total, shingle_total)
            if not info.host:
                info.host = host
            if not info.geo:
                info.geo = geo
            info.offsets.append((server, offsets))

timerange = map(lambda timestamp: datetime.fromtimestamp(timestamp).strftime( "%Y-%m-%d %H:%M:%S" ), timerange)

total_ham, total_spam, total_malic = 0, 0, 0
total_today_ham, total_today_spam, total_today_malic = 0, 0, 0
stats = []

membase = db[ "membase" ]
banobj = db[ "banobj" ]

for (ip, info) in ip_stat.iteritems():
    try:
        if info.total < mincount:
            continue

        key = "%032X" % IPy.IP(ip).int()

        obj = membase.find_one({"_id": key})
        ip_ban = banobj.find_one({"_id": key})

        ham_today, spam_today, malic_today, days_rej, days_norej = 0, 0, 0, 0, 0
        if obj:
            ham_today = max(0, int(obj.get("ctHam", 0)))
            spam_today = max(0, int(obj.get("ctSpam", 0)))
            malic_today = max(0, int(obj.get("ctMalicSpam", 0)))
            days_rej = max(0, int(obj.get("hcDaysRej", 0)))
            days_norej = max(0, int(obj.get("hcDaysNoRej", 0)))

        if todaytype == "statip":
            if ham_today + spam_today + malic_today < mincounttd:
                continue
        elif todaytype == "shingle":
            if info.shingle_total < mincounttd:
                continue

        if maxhamcounttd > 0 and ham_today > maxhamcounttd:
            continue

        total_ham += info.ham
        total_spam += info.spam
        total_malic += info.malic
        total_today_ham += ham_today
        total_today_spam += spam_today
        total_today_malic += malic_today

        stats.append({ "ip": ip, "host": info.host, "geo": info.geo, "count": info.total, "spam": info.spam, "spam_today": spam_today,
                "ham": info.ham, "ham_today": ham_today, "malic": info.malic, "malic_today": malic_today, "shingle_total": info.shingle_total,
                "ban": ip_ban is not None and markbanip, "cuser": ip_ban.get( "cUser", "RULE" ) if ip_ban else "",
                "days_rej": days_rej, "days_norej": days_norej, "showlink": base64.urlsafe_b64encode(json.dumps(info.offsets)) })
    except Exception, e:
        writelog("catcher exception: %s" % str(e), True)

print("Content-type:text/html\r\n\r\n")

try:
    env = Environment(loader=FileSystemLoader("WORKING_DIR/web/internal"))
    template = env.get_template("catcher.html.template")
    setup = db[ "setup" ].find_one({"_id" : "00000000000000000000FFFF7F000001"})
    bantime = setup["bantime"] if setup else 24
    longbantime = setup["longbantime"] if setup else 720
    user = getLogin(os.environ["REMOTE_ADDR"])
    print(template.render(locals()).encode("utf-8"))
except Exception, e:
    writelog("catcher rendering web-page exception: %s" % str(e), True)
