import sys
from datetime import timedelta
from decimal import Decimal
import glob

import pytz

import psycopg2

from config.settings import *
from settings import *
import util.db
from util.parser import parse_file_header, parse_sar_line
from config.sections import SECTIONS, TABLES, determine_section

def load_sar():

    current_tz = pytz.timezone(TIMEZONE)
    
    section = None
    section_name = None
    section = ''
    subsection = [ ]
    
    header_expected = True
        # If true, we're expecting a header rather than a data line.
    
    skipping = False
    
    connection = util.db.connect(transactions=False)
        # Right now, we insert everything without transactions, so if a duplicate
        # row comes in we can discard it without losing an entire transaction's
        # worth fo data.
        
    cur = connection.cursor()
    
    for table in TABLES:
        cur.execute("TRUNCATE TABLE %s" % ( table, ) )

    sar_files = sorted(glob.glob(SAR_PATH + '/*.sar.txt'))

    for filename in sar_files:
        with open(filename, 'r') as f:
            sys.stderr.write("Processing " + filename + "\n")
        
            header_expected = True
            skipping = False
            last_timestamp = None
    
            start_date = parse_file_header(f.readline())
            current_date = start_date
            
            sys.stderr.write("Date from file header is " + str(current_date) + "\n")
            
            for line in f.readlines():    
                
                line = line.strip()
                
                if line == '':
                    header_expected = True
                    skipping = False
                    continue
                
                if skipping:
                    continue
                
                ( timestamp, columns ) = parse_sar_line(line, current_date, current_tz)
                
                if timestamp is None:
                    # This breaks Solaris-style sar files; fix at some point.
                    continue
                
                if header_expected:
                    header_expected = False
                    
                    # We treat the set of columns we received as a header, rather than as
                    # data
                    
                    if sorted(columns) == subsection:
                        # If we have the same set of columns, just continue.
                        print "Continuing section", section_name
                        continue
                    
                    last_timestamp = None

                    ( new_section_name, new_section ) = determine_section(columns)
                    
                    if new_section_name is None:
                        print "Skipping section with columns:", ','.join(columns[:2]), '...'
                        skipping = True
                        continue
                                            
                    # We're now starting either a new section or a new subsection.  In either case,
                    # we first calculate the list of fields to insert.
                    
                    current_date = start_date
                        # Rewind back to the first date
                    
                    section_fields = [ ]
                    key_fields = [ 'measurement_time' ]
                    section_types = [ ]
                    for column_name in columns:
                        if column_name in new_section:
                            section_fields.append(new_section[column_name][0])
                            section_types.append(new_section[column_name][1])
                            if (len(new_section[column_name]) > 2) and new_section[column_name][2]:
                                key_fields.append(new_section[column_name][0])
                        else:
                            # A column we don't recognize; leave a "hole" in the structures
                            # so we skip over it.
                            section_fields.append(None)
                            section_types.append(None)
        
                    field_list = [ f for f in section_fields if f is not None ]
                    
                    section_table = TABLES[new_section_name]
                    
                    # If it's a new section, we use INSERTs to add the data; if it's a new subsection,
                    # we use UPDATEs.
                    
                    if (section is None) or (section_name != new_section_name):
                        value_string = '%(measurement_time)s, ' + ', '.join(['%(' + f + ')s' for f in field_list])
                        field_string = 'measurement_time, ' + ', '.join(field_list)
                        sql_string = "INSERT INTO %(table)s(%(fieldlist)s) VALUES ( %(valuestring)s )" % { 'table':       section_table,
                                                                                                           'fieldlist':   field_string,
                                                                                                           'valuestring': value_string }
                        print "Starting section", new_section_name

                    else:
                        value_fields = [ f for f in field_list if f not in key_fields ]
                        sql_string = "UPDATE %(table)s SET " % { 'table': section_table } 
                        sql_string += ", ".join([field + '=%(' + field + ')s' for field in value_fields])
                        sql_string += " WHERE " + " AND ".join([field + '=%(' + field + ')s' for field in key_fields])
                    
                        print "Starting subsection of", new_section_name

                    section = new_section
                    section_name = new_section_name
                    subsection = sorted(columns)
                                                              
                else:

                    if (last_timestamp is not None) and (last_timestamp > timestamp):
                        # If we've suddenly gone back in time, this means we're crossed a day boundary;
                        # we increment to the next day.
                        
                        current_date += timedelta(days=1)
                        timestamp += timedelta(days=1)
                    
                    values = { 'measurement_time': timestamp }
                    
                    for column_type, field_name, column_value in zip(section_types, section_fields, columns):
                        if field_name is None:
                            continue
                            
                        if column_type == 'n':
                            values[field_name] = Decimal(column_value)
                        else:
                            values[field_name] = column_value
                    
                    try:
                        cur.execute(sql_string, values)
                    except psycopg2.IntegrityError:
                        pass
                            
                    last_timestamp = timestamp
    
    cur.close()
    connection.close()
