#!/usr/bin/python
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import re, time, string
from collections import defaultdict
from datetime import datetime
from statisticsdb import GraphCounter

class ClusterChecker:
    class Record:
        ignore = False
        regtype = "SIMPLE"
        service = ""
        log = ""
        timestamp = -1
        vector = defaultdict( int )

        def __init__( self ):
            self.log = ""
            self.vector = defaultdict( int )

    def __init__( self, db, period ):
        self._re_regtype = re.compile( "__ACT_ADMREG|__ACT_CH_PASS|__ACT_LIGHTREG|__ACT_REG_ONE" )
        self._rtyp_regtypes = set( [ "ADMIMPORTREG", "ADMSOCIALREG", "UNKNOWN", "POSTREGISTRATION" ] )
        self._re_rtim = re.compile( "\((\d+)\)" )
        self._indicator = {}
        self._db = db
        self._period = period
        self._record = ClusterChecker.Record()

    def logSwitch( self, filename ):
        pass

    def _recordFinished( self ):
        r = self._record
        db = self._db
        if r.ignore or r.timestamp == -1 or len( r.service ) == 0 or r.regtype == "__ACT_CH_PASS":
            return
        dt = datetime.fromtimestamp( r.timestamp )
        now = int( time.mktime( dt.date().timetuple() ) )
        timeperiod = int( r.timestamp / self._period ) * self._period

        cl = db.clusterizers[ timeperiod ]
        cl.CL[ r.regtype ].checkDoc( r.vector, r.log )
        cl.CL[ r.service ].checkDoc( r.vector, r.log )
        cl.modified = True

    def _parseRules( self, line ):
        if line.find( "IGNORE_CLUSTER" ) >= 0:
            self._record.ignore = True
            return
        rules = [ elem.split()[ 0 ] for elem in filter( None, map( string.strip, line[ 5: ].split( "," ) ) ) ]
        for comb in rules:
            if comb not in self._indicator:
                if len( self._indicator ) == 0:
                    self._indicator[ comb ] = 0
                else:
                    self._indicator[ comb ] = max( self._indicator.values() ) + 1
            self._record.vector[ self._indicator[ comb ] ] = 1

        found = self._re_regtype.findall( line )
        if len( found ) >= 1:
            self._record.regtype = found[ 0 ]

    def _parseTimestamp( self, line ):
        try:
            self._record.timestamp = int( self._re_rtim.findall( line )[ 0 ] )
        except:
            pass

    def parse( self, line, filename ):
        if len( line.strip() ) == 0:
            self._recordFinished()
            self._record = ClusterChecker.Record()
            return

        r = self._record
        prefix = line[ 0:4 ]

        if prefix == "ruls" or prefix == "r_nl":
            self._parseRules( line )
        elif prefix == "from":
            r.service = line[ 5: ].strip().lower()
        elif prefix == "rtim":
            self._parseTimestamp( line )
        elif prefix == "rtyp":
            regtype = line[ 5: ].strip().upper()
            if regtype in self._rtyp_regtypes:
                self._record.regtype = regtype
        r.log += line
