import sys
import csv
import glob
import gzip
import re

from dateutil.parser import parse

from config.settings import *
from settings import *

import util.db


#

class LogTimeRange:

    minimum = None
    maximum = None
    
    def begin(self, connection):
        self.maximum = None
        self.minimum = None
    
    def ingest(self, connection, line):
        line_time = parse(line[0])
        if self.maximum is None or line_time > self.maximum:
            self.maximum = line_time
        if self.minimum is None or line_time < self.minimum:
            self.minimum = line_time
    
    def done(self, connection):
        cur = connection.cursor()
        
        cur.execute("""
            UPDATE log_range SET minimum = %(min)s::TIMESTAMPTZ,
                                 maximum = %(max)s::TIMESTAMPTZ;
        """, { 'min': self.minimum, 'max': self.maximum, } )
        
        connection.commit()
        
    
#

ingestors = ( LogTimeRange(), )

connection = util.db.connect()

for ingestor in ingestors:
    ingestor.begin(connection)

csv.field_size_limit(1000000000)

writer = csv.writer(sys.stdout)

reject_log_file = open(sys.argv[1], 'w')
reject_log = csv.writer(reject_log_file)

def reject(row, filename, line_num, reason):
    print >>reject_log_file, filename, line_num, reason
    reject_log.writerow(row)

total_lines = 0

log_files = glob.glob(LOGS_PATH + '/*.csv.gz')

if log_files:
    gzipped = True
else:
    gzipped = False

    log_files = glob.glob(LOGS_PATH + '/*.csv')
    if not log_files:
        sys.stderr.write("No log files found.\n")
        sys.exit(1)
    
for filename in sorted(log_files):
    if gzipped:
        f = gzip.open(filename, 'r')
    else:
        f = open(filename, 'r')

    sys.stderr.write("Processing " + filename + "\n")
    reader = csv.reader(f)
    line_count = 0
    rejects = 0
    line = ''
    try:
        for line in reader:
            line_count += 1
            
            if len(line) == 22:
                line.append('')
            
            if len(line) != 23:
                reject(line, filename, reader.line_num, 'Column count is ' + str(len(line)))
                rejects += 1
                continue
            
            dts = line[0]
            if not re.match(r'\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d', dts):
                reject(line, filename, reader.line_num, 'First column is not a DTS')
                rejects += 1
                continue
                
            writer.writerow(line)

            for ingestor in ingestors:
                ingestor.ingest(connection, line)

            total_lines += 1
            if (total_lines % 100000) == 0:
                sys.stderr.write(str(total_lines) + " processed\n")
    except Exception as e:
        sys.stderr.write("Error in file "+filename+" at line "+str(line_count)+", exception:")
        sys.stderr.write(str(e)+"\n")
        sys.stderr.write("Previous line: " + str(line) + "\n")
        continue
    finally:
        f.close()

    sys.stderr.write(filename + " done, " + str(line_count) + " lines, " + str(rejects) + " rejects\n")

for ingestor in ingestors:
    ingestor.done(connection)

connection.close()

reject_log_file.close()