#!/usr/bin/python
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import os, traceback, sys, time
from datetime import datetime
from collections import defaultdict
from dictToStrings import dictToArr
from clusterizer import Clusterizer
from alarmerproc import sendAlarmer
from itertools import chain
from common import *
from db_resistant import PGProxy

class GraphCounter:
    total = 0
    spam = 0
    malic = 0
    emails = 0
    phones = 0
    badgeo = 0
    newips = 0
    spam_from_good50 = 0
    spam_from_good75 = 0
    delivery_spam_from_good50 = ""
    total_turkey = 0
    spam_turkey = 0
    malic_turkey = 0
    whitelist = 0
    locl = 0

    def __init__(self):
        self.ips = set([])
        self.regtypes = defaultdict(int)
        self.services = defaultdict(int)
        self.chpass_types = defaultdict(int)

class LoginInfo:
    def __init__(self):
        self.regtypes = defaultdict(lambda: defaultdict(int))
        self.services = defaultdict(lambda: defaultdict(int))
        self.services = defaultdict(lambda: defaultdict(int))
        self.total = defaultdict(int)
        self.spam = defaultdict(int)
        self.malic = defaultdict(int)
        self.chpass = defaultdict(int)
        self.regtype_id, self.chpass_type_id, self.ts, self.ip, self.firstreg, self.lastreg = 0, 0, 0, '', -1, -1
        self.karma, self.name, self.sirname, self.geo, self.geo_id, self.as_id, self.lang = 0, '', '', '', 0, 0, ''

    @staticmethod
    def logSwitch(cursor):
        loggerFLP.trace("LoginInfo logSwitch start")
        cursor.execute("UPDATE ips SET yesterday_total = 0, yesterday_spam = 0, yesterday_malic = 0 WHERE yesterday_total > 0 or yesterday_spam > 0");
        cursor.execute("""UPDATE ips SET yesterday_total = total, yesterday_spam = spam, yesterday_malic = malic,
                        total = 0, spam = 0, malic = 0, chpass = 0 WHERE total > 0 or spam > 0 or malic > 0 or chpass > 0""")
        loggerFLP.trace("LoginInfo logSwitch done")

class Clusterizers:
    modified = False
    CL = defaultdict(Clusterizer)

    def __init__(self):
        self.CL = defaultdict(Clusterizer)

    def _checkClusterizer(self, cl):
        counts = defaultdict(int)
        indicator = 0
        for index in sorted(cl.docs.keys()):
            cluster_len = len(cl.docs[index])
            counts[cluster_len] += 1
            if cluster_len >= 2:
                indicator += cluster_len
        max_cluster_len = max(counts.keys())
        percent = (indicator * 100) / cl.getDocsCount()
        if (percent > 10 or indicator > 30) and max_cluster_len > 2:
            return True, counts, percent, indicator
        else:
            return False, counts, percent, indicator

    def _addClustersInfo(self, cl, info):
        info["ClustersNumber"] = len(cl.docs)
        info["Clusters"] = []
        lengths = defaultdict(list)
        for (index, cluster_docs) in cl.docs.iteritems():
            l = len(cluster_docs)
            if l > 1:
                lengths[l].append(index)
        if len(lengths) == 0:
            return
        info["MaxClusterLen"] = max(lengths.keys())
        has_ham_cluster = False
        for (length, indexes) in sorted(lengths.iteritems(), reverse = True):
            for index in indexes:
                info_cluster = {}
                info_cluster["Docs"] = []
                some_is_ham = False
                for doc in sorted(cl.docs[index], key = lambda s: 0 if s[1].find("\nspam: no") >= 0 else 1):
                    info_cluster["Docs"].append(filter(lambda line: len(line.strip()) > 0, doc[1].split("\n")))
                    if doc[1].find("\nspam: no") >= 0:
                        some_is_ham = True
                if some_is_ham:
                    info["Clusters"].append(info_cluster)
                    has_ham_cluster = True
        return has_ham_cluster

    def _addClustersDistributionInfo(self, cl, counts, info, total):
        info["DifferentClusterLengths"] = len(counts)
        info["TotalDocs"] = cl.getDocsCount()
        info["ClusterTypes"] = []
        for (l, count) in sorted(counts.iteritems()):
            cluster_type = { "DocsInCluster": l, "ClustersCount": count, "PercentToTotalDocs": (l * 100) / total }
            info["ClusterTypes"].append(cluster_type)

    def save(self, cursor, timestamp):
        loggerFLP.trace("Clusterizers save start")
        total_info = {}
        dt = datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d %H:%M:%S")
        total_info[dt] = {}
        info = total_info[dt]
        total = 0
        for cl in self.CL.values():
            total += cl.getDocsCount()
        somespam = False
        for (keyword, cl) in self.CL.iteritems():
            spamflag, counts, percent, indicator = self._checkClusterizer(cl)
            info[keyword] = {}
            info[keyword]["Spam"] = spamflag
            info[keyword]["Indicator"] = indicator
            info[keyword]["Percent"] = percent
            has_ham_cluster = self._addClustersInfo(cl, info[keyword])
            self._addClustersDistributionInfo(cl, counts, info[keyword], total)
            if not spamflag or not has_ham_cluster:
                continue
            ok = True
            if keyword == "mail":
                ok = indicator >= 50
            elif keyword == "social":
                ok = indicator >= 20
            if ok:
                somespam = True
                cursor.execute("""INSERT INTO clusterizer_signal (ts, keyword, indicator, percent, docscount) VALUES(%s, %s, %s, %s, %s) ON CONFLICT (ts, keyword)
                               DO UPDATE SET indicator = EXCLUDED.indicator, percent = EXCLUDED.percent, docscount = EXCLUDED.docscount""", (timestamp, keyword, indicator, percent, cl.getDocsCount()))
        arr = []
        dictToArr(total_info, [], arr)
        logtext = "\n".join(arr)
        if somespam:
            cursor.execute("INSERT INTO clusterizer_log (ts, log) VALUES(%s, %s) ON CONFLICT (ts) DO UPDATE SET log = EXCLUDED.log", (timestamp, logtext))
        loggerFLP.trace("Clusterizers save done")

class Database:
    def __init__(self):
        getCredentials()
        self._db = PGProxy(host = PG['host'], port = PG['port'], user = PG['user'], passwd = PG['passwd'], db = PG['db'], charset = PG['charset'])
        self.graph = defaultdict(GraphCounter)
        self.logins = defaultdict(LoginInfo)
        self.clusterizers = defaultdict(Clusterizers)
        self.geozones = defaultdict(int)
        self.regtypes = defaultdict(int)
        self.services = defaultdict(int)
        self.chpass_types = defaultdict(int)
        self.ass = defaultdict(int)
        self.cursor = None

    def save(self, now):
        loggerFLP.trace("DB: save start")
        now, t0 = time.mktime(now.timetuple()), time.time()
        self.cursor = self._db.cursor()
        loggerFLP.trace("DB: save graph items: %d" % len(self.graph.keys())); t1 = time.time()
        self.saveGraphCounters()
        loggerFLP.trace("DB: save step 1: %f" % (time.time() - t1))
        total = len(self.logins)
        loggerFLP.trace("DB: save logins items: %d" % total); t1 = time.time()
        self.saveLoginsCounters(now)
        loggerFLP.trace("DB: save step 2: %f" % (time.time() - t1))
        loggerFLP.trace("DB: save clusterizers items: %d" % len(self.clusterizers.keys())); t1 = time.time()
        for timestamp in self.clusterizers.keys():
            clusterizer = self.clusterizers[timestamp]
            if not clusterizer.modified:
                self.clusterizers.pop(timestamp)
            else:
                clusterizer.save(self.cursor, timestamp)
                clusterizer.modified = False
        loggerFLP.trace("DB: save step 3: %f" % (time.time() - t1))
        self._db.commit()
        self.cursor.close()
        loggerFLP.trace("DB: save done: %f" % (time.time() - t0))

    def fetchID(self, collections_name, field_names, *values):
        i = 0
        vals = ' AND '.join('{0} = %s'.format(field_names.split(',')[i]) for i in range(len(field_names.split(','))))
        row = self.cursor.fetchone()
        if row and isinstance(row, tuple):
            i = int(row[0])
        else:
            self.cursor.execute("SELECT id FROM {0} WHERE {1}".format(collections_name, vals), values)
            row = self.cursor.fetchone()
            if row and isinstance(row, tuple):
                i = int(row[0])
        return i

    def saveLoginsCounters(self, now):
        try:
            for (login, info) in self.logins.iteritems():
                self.cursor.execute("""INSERT INTO ips (ip, all_total, all_spam, all_malic, all_chpass, chpass, firstreg, lastreg, total, spam, malic, yesterday_total, yesterday_spam, yesterday_malic,
                    active, lastupdate, geo_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, 1, %s, NULLIF(%s, 0))
                    ON CONFLICT (ip) DO UPDATE SET all_total = ips.all_total + EXCLUDED.all_total, all_spam = ips.all_spam + EXCLUDED.all_spam, all_malic = ips.all_malic + EXCLUDED.all_malic,
                    all_chpass = ips.all_chpass + EXCLUDED.all_chpass, chpass = ips.chpass + EXCLUDED.chpass, firstreg = LEAST(ips.firstreg, EXCLUDED.firstreg),
                    lastreg = GREATEST(ips.lastreg, EXCLUDED.lastreg), total = ips.total + EXCLUDED.total, spam = ips.spam + EXCLUDED.spam, malic = ips.malic + EXCLUDED.malic,
                    yesterday_total = ips.yesterday_total + EXCLUDED.yesterday_total, yesterday_spam = ips.yesterday_spam + EXCLUDED.yesterday_spam,
                    yesterday_malic = ips.yesterday_malic + EXCLUDED.yesterday_malic, active = ips.active + (CASE WHEN ips.lastupdate = EXCLUDED.lastupdate THEN 0 ELSE 1 END),
                    lastupdate = GREATEST(ips.lastupdate, EXCLUDED.lastupdate) WHERE ips.ip = %s RETURNING id""",
                        (info.ip if info.ip else '::', sum(info.total.values()), sum(info.spam.values()), sum(info.malic.values()), sum(info.chpass.values()),
                        info.chpass[now], info.firstreg, info.lastreg, info.total[now], info.spam[now], info.malic[now], info.total[now - 86400], info.spam[now - 86400], info.malic[now - 86400],
                        now, info.geo_id if info.geo_id else None, info.ip if info.ip else '::'))
                ip_id = self.fetchID('ips', 'ip', info.ip if info.ip else '::')
                self.cursor.execute("""INSERT INTO logins (login, ts_reg, ip_id, regtype_id, karma, name, sirname, as_id, geo_id, lang, last_karma, last_ts, last_ip_id, last_as_id, last_geo_id)
                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    ON CONFLICT (login) DO UPDATE SET last_karma = EXCLUDED.last_karma, last_ts = EXCLUDED.last_ts, last_ip_id = EXCLUDED.last_ip_id, last_geo_id = EXCLUDED.last_geo_id,
                        ts_reg = LEAST(logins.ts_reg, EXCLUDED.ts_reg) WHERE logins.login = %s""", (login, info.ts, ip_id, info.regtype_id, info.karma, info.name, info.sirname,
                        info.as_id if info.as_id else None, info.geo_id if info.geo_id else None, info.lang if info.lang else None, info.karma, info.ts, ip_id, info.as_id if info.as_id else None,
                        info.geo_id if info.geo_id else None, login))
                if info.chpass_type_id:
                    login_id = self.fetchID('logins', 'login', login)
                    self.cursor.execute("""INSERT INTO chpass_stat (login_id, ts, ip_id, chpass_type_id, karma, as_id, geo_id) VALUES (%s, %s, %s, %s, %s, %s, %s)
                                        ON CONFLICT (login_id, ts, chpass_type_id) DO NOTHING""",
                                        (login_id, info.ts, ip_id, info.chpass_type_id, info.karma, info.as_id if info.as_id else None, info.geo_id if info.geo_id else None))
            self.logins.clear()
            loggerFLP.trace("Logins cleared: %d" % len(self.logins.keys()))
        except Exception, e:
            loggerFLP.error("saveLogins exception: %s" % str(e), True)

    def saveGraphCounters(self):
        if len(self.graph.keys()) < 1:
            return
        try:
            self.cursor.executemany("""INSERT INTO graph (ts, total, spam, malic, ips, newips, emails, phones, badgeo, spam_from_good50, spam_from_good75, total_turkey, spam_turkey, malic_turkey, whitelist, locl)
                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    ON CONFLICT (ts) DO UPDATE SET total = graph.total + EXCLUDED.total, spam = graph.spam + EXCLUDED.spam, malic = graph.malic + EXCLUDED.malic, ips = graph.ips + EXCLUDED.ips,
                        newips = graph.newips + EXCLUDED.newips, emails = graph.emails + EXCLUDED.emails, phones = graph.phones + EXCLUDED.phones, badgeo = graph.badgeo + EXCLUDED.badgeo,
                        spam_from_good50 = graph.spam_from_good50 + EXCLUDED.spam_from_good50, spam_from_good75 = graph.spam_from_good75 + EXCLUDED.spam_from_good75,
                        total_turkey = graph.total_turkey + EXCLUDED.total_turkey, spam_turkey = graph.spam_turkey + EXCLUDED.spam_turkey, malic_turkey = graph.malic_turkey + EXCLUDED.malic_turkey,
                        whitelist = graph.whitelist + EXCLUDED.whitelist, locl = GREATEST(graph.locl, EXCLUDED.locl) WHERE graph.ts = %s""",
                    map(lambda (ts, c): (ts, c.total, c.spam, c.malic, len(c.ips), c.newips, c.emails, c.phones, c.badgeo, c.spam_from_good50, c.spam_from_good75, c.total_turkey, c.spam_turkey,
                                         c.malic_turkey, c.whitelist, c.locl, ts), self.graph.iteritems()))
            self.cursor.executemany("""INSERT INTO graph_regtypes (ts, regtype_id, cnt) VALUES(%s, %s, %s) ON CONFLICT (ts, regtype_id)
                                    DO UPDATE SET cnt = graph_regtypes.cnt + EXCLUDED.cnt WHERE graph_regtypes.ts = %s AND graph_regtypes.regtype_id = %s""",
                                    chain.from_iterable(map(lambda (ts, g): filter(len, map(lambda (rtc, v): (ts, rtc, v, ts, rtc), g.regtypes.iteritems())), self.graph.iteritems())))
            self.cursor.executemany("""INSERT INTO graph_services (ts, service_id, cnt) VALUES(%s, %s, %s) ON CONFLICT (ts, service_id)
                                    DO UPDATE SET cnt = graph_services.cnt + EXCLUDED.cnt WHERE graph_services.ts = %s AND graph_services.service_id = %s""",
                                    chain.from_iterable(map(lambda (ts, g): filter(len, map(lambda (sc, v): (ts, sc, v, ts, sc), g.services.iteritems())), self.graph.iteritems())))
            self.cursor.executemany("""INSERT INTO graph_chpass_types (ts, chpass_type_id, cnt) VALUES(%s, %s, %s) ON CONFLICT (ts, chpass_type_id)
                                        DO UPDATE SET cnt = graph_chpass_types.cnt + EXCLUDED.cnt WHERE graph_chpass_types.ts = %s AND graph_chpass_types.chpass_type_id = %s""",
                                    chain.from_iterable(map(lambda (ts, g): filter(len, map(lambda (chptc, v): (ts, chptc, v, ts, chptc), g.chpass_types.iteritems())), self.graph.iteritems())))
            self.cursor.execute("SELECT ts, spam_from_good50 FROM graph WHERE ts IN (%s)" % ','.join(map(str, self.graph.keys())))
            for row in self.cursor.fetchall():
                current_spam_from_good_50 = row[1] if row and isinstance(row, tuple) else 0
                if current_spam_from_good_50 >= SPAM_FROM_GOOD_ALARM_BOUNDARY:
                    loggerFLP.trace("Sending SPAM_FROM_GOOD alarmer")
                    sendAlarmer("Frodo: Spam from good IP-s >= %d" % SPAM_FROM_GOOD_ALARM_BOUNDARY, self.graph[row[0]].delivery_spam_from_good50)
        except Exception, e:
            loggerFLP.error("saveGraphCounters exception: %s" % str(e), True)
        self.graph.clear()

    def addCounter(self, collections_name, field_names, value, *values):
        counters, i = getattr(self, collections_name), 0
        if value in counters:
            return counters[value]
        try:
            new_cursor = False
            if not self.cursor or self.cursor.closed():
                self.cursor = self._db.cursor()
                new_cursor = True
            vals_template = ', '.join('%s' for i in range(len(field_names.split(','))))
            self.cursor.execute("INSERT INTO {0} ({1}) VALUES ({2}) ON CONFLICT DO NOTHING RETURNING id".format(collections_name, field_names, vals_template), values)
            i = counters[value] = self.fetchID(collections_name, field_names, *values)
            if new_cursor:
                self._db.commit()
                self.cursor.close()
        except Exception, e:
            loggerFLP.error("addCounter '%s' in %s exception: %s" % (field_names, collections_name, str(e)), True)
        return i

    def addRegType(self, regtype):
        return self.addCounter('regtypes', 'regtype', regtype, regtype)

    def addGeo(self, geo):
        return self.addCounter('geozones', 'country, city', geo, geo[:2], geo[3:])

    def addService(self, service):
        return self.addCounter('services', 'service', service, service)

    def addChpassType(self, chpass_type):
        return self.addCounter('chpass_types', 'chpass_type', chpass_type, chpass_type)

    def addAS(self, AS):
        return self.addCounter('ass', '_as', AS, AS)

    def logSwitch(self, now):
        loggerFLP.trace("DB: logSwitch start")
        try:
            self.save(now)
            self.cursor = self._db.cursor()
            LoginInfo.logSwitch(self.cursor)
            self.logins.clear()
            self._db.commit()
            self.cursor.close()
        except Exception, e:
            loggerFLP.error("DB: logSwitch exception: %s" % str(e), True)
        loggerFLP.trace("DB: logSwitch done")
