#!/usr/bin/python
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import os, time, re, psycopg2
import psycopg2.errorcodes
from datetime import datetime, timedelta
from collections import defaultdict
from common import *

GRAPH_POINTS = 500

class DataExtractor:
    def __init__(self):
        self._db = getPGdb(PG)

    def get_aggregator(self, field):
        if field == "locl":
            return "MAX(%s)" % field
        else:
            return "ROUND(AVG(%s), 2)" % field

    def parsePeriod(self, starttime, endtime, follow, parser, fields, sec_table_field = ''):
        subq, table = "", "graph_%ss" % sec_table_field if sec_table_field else 'graph g'
        if not follow:
            subq = "AND ts <= %d" % endtime
        else:
            cursor = self._db.cursor()
            cursor.execute("SELECT MAX(g.ts) FROM %s" % table)
            endtime = cursor.fetchone()[0]
            if endtime is None:
                endtime = 0
            cursor.close()
        numpoints = float(endtime - starttime) / PARSE_PERIOD
        coeff = numpoints / GRAPH_POINTS
        timedelta = max(int(PARSE_PERIOD * coeff), PARSE_PERIOD)
        cursor, records = self._db.cursor(), {}
        if sec_table_field:
            table = '{0} g LEFT JOIN {1}s o ON g.{1}_id = o.id'.format(table, sec_table_field)
            for field in fields:
                query = "SELECT ROUND(ts / %d) * %d AS t, AVG(g.cnt) FROM %s WHERE o.%s = '%s' AND ts >= %d %s GROUP BY t;" % (timedelta, timedelta, table, sec_table_field, '-' if field == 'dash' else field, starttime, subq)
                cursor.execute(query)
                rows = cursor.fetchmany()
                loggerFS.trace("DataExtractor parsePeriod (Field = '%s'):\n\tQuery: %s\nFirstChunkLength: %s" % (field, query, len(rows)))
                while rows:
                    for record in rows:
                        loggerFS.trace("DataExtractor parsePeriod (Field = '%s'): Record: %s" % (field, record))
                        timestamp = int(record[0])
                        if timestamp in records:
                            records[timestamp][field] = float(record[1])
                        else:
                            records[timestamp] = {}
                            for field in fields:
                                records[timestamp][field] = 0
                    rows = cursor.fetchmany()
                loggerFS.trace("DataExtractor parsePeriod (Field = '%s'):\n\tQuery: %s\n\tRecords: %s" % (field, query, records))
        else:
            fields_avg = ", ".join([self.get_aggregator(field) for field in fields])
            query = "SELECT ROUND(g.ts / %d) * %d t, %s FROM %s WHERE g.ts >= %d %s GROUP BY t" % (timedelta, timedelta, fields_avg, table, starttime, subq)
            cursor = self._db.cursor()
            cursor.execute(query)
            for record in cursor.fetchall():
                d, c, timestamp = {}, 1, int(record[0])
                for field in fields:
                    d[field] = record[c]
                    c += 1
                records[timestamp] = d
            #loggerFS.trace("DataExtractor parsePeriod:\n\tQuery: %s\n\tRecords: %s" % (query, records))
        if follow and len(records) > 0:
                endtime = max(records.keys())
        query = """SELECT ROUND(ts / %d) * %d t, ts, keyword, indicator, percent, docscount FROM clusterizer_signal WHERE ts >= %d %s ORDER BY t, keyword""" % \
                   (timedelta, timedelta, starttime, subq)
        cursor = self._db.cursor()
        cursor.execute(query)
        signals = defaultdict(lambda: defaultdict(list))
        for record in cursor.fetchall():
            timestamp = int(record[0])
            real_timestamp = int(record[1])
            signals[timestamp][record[2]] = [real_timestamp, "Indicator: %s, Percent: %s, TotalDocs: %s" % (record[3], record[4], record[5])]
        #loggerFS.trace("DataExtractor parsePeriod:\n\tQuery: %s\n\tSignals: %s" % (query, signals))
        for timestamp in range(int(starttime / timedelta) * timedelta, endtime, timedelta):
            if timestamp in records:
                params = records[timestamp]
            else:
                params = defaultdict(int)
            parser.parse(timestamp, params, signals[timestamp])
        cursor.close()

    def extractIpInfo(self, field, number, filter):
        cursor = self._db.cursor()
        now = int(time.mktime(datetime.today().date().timetuple()))
        for i in range(len(filter)):
            if filter[i].startswith("firstreg") or filter[i].startswith("lastreg"):
                pos = filter[i].find("reg") + 4
                try:
                    timestamp = int(time.mktime(datetime.strptime(filter[i][pos:], "%Y-%m-%d %H:%M:%S").timetuple()))
                    filter[i] = filter[i][:pos] + str(timestamp)
                    continue
                except:
                    pass
                try:
                    timestamp = int(time.mktime(datetime.strptime(filter[i][pos:], "%Y-%m-%d").timetuple()))
                    filter[i] = filter[i][:pos] + str(timestamp)
                    continue
                except:
                    pass
                try:
                    timestamp = int(time.mktime((datetime.today().date() - timedelta(days = int(filter[i][pos:]))).timetuple()))
                    filter[i] = filter[i][:pos] + str(timestamp)
                    continue
                except:
                    pass
            elif filter[i].startswith("geo"):
                geozones = re.findall("[^,\s][^,]*[^,\s]", filter[i][3:], re.UNICODE)
                if len(geozones) == 1:
                    filter[i] = "geo = (SELECT id FROM geozones WHERE (country || ' ' || city) = '" + geozones[0] + "')"
                else:
                    filter[i] = "geo IN (SELECT id FROM geozones WHERE (country || ' ' || city) IN (" + ",".join(["'%s'" % elem for elem in geozones]) + "))"
        filter.append("%s >= 0" % field)
        where = " AND ".join(filter)
        where = "WHERE %s" % where
        query = """SELECT ip, firstreg, lastreg, total, yesterday_total, all_total, spam, yesterday_spam, all_spam, malic, yesterday_malic, all_malic,
                      (total - spam - malic) ham, (yesterday_total - yesterday_spam - yesterday_malic) yesterday_ham, (all_total - all_spam - all_malic) all_ham,
                      (spam + malic) spamlic, (yesterday_spam + yesterday_malic) yesterday_spamlic, (all_spam + all_malic) all_spamlic, active, all_chpass,
                      (geozones.country || ' ' || geozones.city) geozone
                      FROM ips LEFT JOIN geozones ON ip.geo = geozones.id %s ORDER BY %s DESC LIMIT %d""" % (where, field, number)
        cursor.execute(query)
        ret = cursor.fetchall()
        cursor.close()
        loggerFS.trace("Query: %s, Return: %s" % (query, ret))
        return ret

    def getGeoZones(self):
        cursor = self._db.cursor()
        cursor.execute("SELECT DISTINCT (country || ' ' || city) geozone FROM geozones")
        ret = cursor.fetchall()
        cursor.close()
        return ret

    def getGeoGroups(self):
        cursor = self._db.cursor()
        cursor.execute("SELECT name, value FROM geogroup")
        ret = {}
        for [name, value] in cursor.fetchall():
            ret[name] = value
        cursor.close()
        return ret

    def saveGeoGroup(self, name, value):
        cursor = self._db.cursor()
        try:
            cursor.execute("INSERT INTO geogroup (name, value) VALUES('%s', '%s')" % (name, value))
        except psycopg2.DatabaseError, e:
            if e.pgcode == '23505': # unique_violation
                return "Group with that name already exist"
        except Exception, e:
            #if e[ 0 ] == 1062:
            #    return "Group with that name already exist"
            return str(e)
        return ""

    def deleteGeoGroup(self, name):
        cursor = self._db.cursor()
        cursor.execute("DELETE FROM geogroup WHERE name = '%s'" % name)
        return ""

    def getGeoStat(self):
        cursor = self._db.cursor()
        cursor.execute("""SELECT (gz.country || ' ' || gz.city) geo, day_total, day_ham, hour_total, hour_ham FROM geostat gs LEFT JOIN geozones gz ON gs.geo_id = gz.id
                            WHERE gs.geo_id NOT IN (SELECT geo_id FROM geostatwhite)""")
        return cursor.fetchall()

    def getGeoStatWhite(self):
        cursor = self._db.cursor()
        cursor.execute("SELECT (gz.country || ' ' || gz.city) geo FROM geostatwhite gsw LEFT JOIN geozones gz ON gsw.geo_id = gz.id ORDER BY gz.country, gz.city")
        return cursor.fetchall()

    def addGeoStatWhite(self, name):
        cursor = self._db.cursor()
        try:
            cursor.execute("INSERT INTO geostatwhite (geo_id) VALUES('%s')" % self._db.addGeo(name))
        except psycopg2.DatabaseError, e:
            if e.pgcode == '23505': # unique_violation
                return "Record in whitelist already exist"
        except Exception, e:
            #if e[ 0 ] == 1062:
            #    return "Record in whitelist already exist"
            return str(e)
        return ""

    def removeGeoStatWhite(self, name):
        cursor = self._db.cursor()
        cursor.execute("DELETE FROM geostatwhite WHERE geo_id = '%s'" % self._db.addGeo(name))
        return ""

    def getSignalFullLog(self, timestamp):
        cursor = self._db.cursor()
        cursor.execute("SELECT log FROM clusterizer_log WHERE ts = date_part('epoch', timestamptz '%s')" % timestamp)
        rec = cursor.fetchone()
        if rec is None:
            return ""
        else:
            return to_utf8(rec[0])

    def getSignalShortLog(self, timestamp, keyword):
        log = self.getSignalFullLog(timestamp)
        lines = filter(lambda line: line.find(keyword + " Clusters 1 ") >= 0, log.split("\n"))
        return "\n".join(lines)

    def getSignalList(self, date):
        morning = time.mktime(date.timetuple())
        evening = time.mktime((date + timedelta(days = 1)).timetuple())
        cursor = self._db.cursor()
        cursor.execute("SELECT ts, keyword, indicator, percent, docscount FROM clusterizer_signal WHERE ts >= %d and ts < %d" % (morning, evening))
        return cursor.fetchall()
