#!/usr/bin/python
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import re, time, socket, binascii
from datetime import datetime
from common import loggerFLP

def getVal(val):
    pos = val.find('(')
    return val[:pos].strip() if pos > -1 else val.strip()

class DeliveryParser:
    class Record:
        ip = ""
        ip_i = ""
        login = ""
        name = ""
        sirname = ""
        regtype = "SIMPLE"
        service = ""
        chpass_type = ""
        lang = ''
        email = 0
        phone = 0
        good = False
        timestamp = -1
        badgeo = False
        newip = False
        ham_percent = 0
        spam = False
        malic = False
        is_turkey = False
        geo = ""
        AS = ""
        log = ""
        is_whitelist = False
        locl = ""

    def __init__(self, db, period):
        self._re_rtim = re.compile("\((\d+)\)")
        self._re_geos = re.compile(r"\s*([A-Z]+.*?\w+)\s*(?:\(.*?\)\s*)?AS='(\w*)'")
        self._db = db
        self._period = period
        self._record = DeliveryParser.Record()

    def logSwitch(self, filename):
        pass

    def _isGoodIp(self, ip):
        return len(ip) > 0

    def _recordFinished(self):
        r = self._record
        if not r.good:
            return
        if len(r.ip_i) > 0:
            r.ip = r.ip_i
        if not self._isGoodIp(r.ip):
            loggerFLP.error("IP is bad: %s" % r.ip)
            return
        if r.timestamp <= 0:
            r.timestamp = time.time()
        dt = datetime.fromtimestamp(r.timestamp)
        now = int(time.mktime(dt.date().timetuple()))
        timeperiod = int(r.timestamp / self._period) * self._period
        c = self._db.graph[timeperiod]
        l = self._db.logins[r.login]
        l.total[now] += 1
        c.total += 1
        if len(r.ip) > 0:
            c.ips.add(r.ip)
            l.ip = r.ip
        if l.firstreg == -1:
            l.firstreg = r.timestamp
        else:
            l.firstreg = min(l.firstreg, r.timestamp)
        l.ts = l.lastreg = r.timestamp
        l.lang = r.lang
        if r.geo:
            l.geo_id = self._db.addGeo(r.geo)
        if r.AS:
            l.as_id = self._db.addAS(r.AS)
        if r.regtype == "__ACT_CH_PASS":
            l.chpass[now] += 1
        if len(r.regtype) and r.regtype != '-' > 0:
            l.regtype_id = self._db.addRegType(r.regtype)
            c.regtypes[l.regtype_id] += 1
        if len(r.chpass_type) > 0:
            l.chpass_type_id = self._db.addChpassType(r.chpass_type)
            c.chpass_types[l.chpass_type_id] += 1
        if r.badgeo:
            c.badgeo += 1
        if r.newip:
            c.newips += 1
        l.karma = 0 if not r.spam else (100 if r.malic else 85)
        if r.spam:
            if r.ham_percent >= 50:
                c.spam_from_good50 += 1
                c.delivery_spam_from_good50 += r.log + "\n"
            if r.ham_percent >= 75:
                c.spam_from_good75 += 1
        if r.is_turkey:
            c.total_turkey += 1
            if r.spam and not r.malic:
                c.spam_turkey += 1
            if r.malic:
                c.malic_turkey += 1
        if r.is_whitelist:
            c.whitelist += 1
        if len(r.service) > 0:
            c.services[self._db.addService(r.service)] += 1
        c.emails += r.email
        c.phones += r.phone
        c.locl = r.locl

    def _parseRules(self, line):
        if not self._record.badgeo:
            self._record.badgeo = (line.find(" BAD_GEO_ZONE,") != -1)
        if not self._record.newip:
            self._record.newip = (line.find(" __STAT_NEW_IP,") != -1)
        if not self._record.is_whitelist:
            self._record.is_whitelist = (line.find(" IS_WHITE_LIST ") != -1)

    def _parseTimestamp(self, line):
        try:
            self._record.timestamp = int(self._re_rtim.findall(line)[0])
        except:
            self._record.timestamp = time.time()

    def _parseHamPercent(self, line):
        try:
            rec = map(int, line[5:].split(",")[:-1])
            self._record.ham_percent = (rec[1] * 100) / (rec[1] + rec[2] + rec[3])
        except:
            pass

    def _parseIp(self, line):
        return line[5:].strip().lower()

    def _parseGeo(self, line):
        s = line[6:]
        m = self._re_geos.match(s)
        if m:
            self._record.AS = m.group(2)
            return m.group(1)
        else:
            m = re.search(r'\bAS\d+', s)
            if m:
                self._record.AS = m.group(0)
            return '' if s.find(';') > -1 else s.strip()

    def parse(self, line, filename):
        if len(line.strip()) == 0:
            self._recordFinished()
            self._record = DeliveryParser.Record()
            return
        r = self._record
        prefix = line[0:4]
        if prefix == "mess":
            r.good = True
        elif prefix == "ip  ":
            r.ip = self._parseIp(line)
        elif prefix == "ip_i":
            r.ip_i = self._parseIp(line)
        elif prefix == "inam":
            r.name = getVal(line[6:])
        elif prefix == "fnam":
            r.sirname = getVal(line[6:])
        elif prefix == "lang":
            r.lang = line[11:2] if line[11:2] else ''
        elif prefix == "ruls" or prefix == "r_nl" or prefix == "r_sp":
            self._parseRules(line)
            if prefix == "r_nl":
                r.malic = line.find("MALICIOUS") >= 0
        elif prefix == "from":
            r.service = line[5:].strip().lower()
        elif prefix == "rtim":
            self._parseTimestamp(line)
        elif prefix == "emal":
            r.email = 1
        elif prefix == "phon":
            r.phone = 1
        elif prefix == "logn":
            r.login = line[5:].strip().lower()
        elif prefix == "ipbs":
            self._parseHamPercent(line)
        elif prefix == "spam":
            r.spam = (line[5:].strip() == "yes")
        elif prefix == "geos":
            r.is_turkey = (line[6:8] == "TR")
            r.geo = self._parseGeo(line)
        elif prefix == "rtyp":
            regtype = line[5:].strip().upper()
            m = re.match(r'CHANGE_PASSWORD(?:_([A-Z_]+))?', regtype)
            if m:
                r.chpass_type = m.group(1) if m.group(1) else 'CHANGE_PASSWORD'
            else:
                i = regtype.find('(')
                r.regtype = regtype[:i].strip() if i > -1 else regtype
        elif prefix == "locl":
            r.locl = binascii.crc32(line[5:].strip()) & 0xFFFFFFFF
        r.log += line
