#!/usr/bin/python
# -*- coding: utf-8 -*-

import ConfigParser
import os
import re
import logging
import sys
import fcntl
import time
import socket

# from https://github.com/F483/neobunch
class Config(dict):
    def __contains__(self, k):
        try:
            return dict.__contains__(self, k) or hasattr(self, k)
        except:
            return False

    def __getattr__(self, k):
        try:
            return object.__getattribute__(self, k)
        except AttributeError:
            try:
                return self[k]
            except KeyError:
                raise AttributeError(k)

    def __setattr__(self, k, v):
        try:
            object.__getattribute__(self, k)
        except AttributeError:
            try:
                self[k] = v
            except:
                raise AttributeError(k)
        else:
            object.__setattr__(self, k, v)

    def __delattr__(self, k):
        try:
            object.__getattribute__(self, k)
        except AttributeError:
            try:
                del self[k]
            except KeyError:
                raise AttributeError(k)
        else:
            object.__delattr__(self, k)


# only flat default dict supported
defaults = {
    'logfile':                        '/var/log/logpusher.log', # [main] only, for compatibility yet, log redirected to file in upstart script
    'cache_dir':                      '/var/cache/logsender', # [main] only, dir for metadata (logfile position) files
    'bot_host':                       'bot.yandex-team.ru:80',# [main] only
    'clickhouse_host':                ['REQUIRED'],
    'regexp':                         '',
    'file':                           'REQUIRED',
    'date_format':                    '%d/%b/%Y:%H:%M:%S',
    'table':                          'logs',
    'ch_columns':                     ['hostname', 'dc', 'day', 'date', 'src_ip', 'vhost', 'url', 'code', 'static', 'time'], # default columns with special parsing rules
    'user_columns':                   [],
    'ch_send_timeout':                600,
    'skip_urls':                      [], # /alive, etc
    'vhost':                          None,
    'store_urls':                     True,
    'store_full_request_time':        False,
    'skip_dangerous_file_first_time': False,       # seek to last file line if filesize > dangerous_file_size (only in first run)
    'mon_threshold':                  117766800,   # if difference between cached log position and real log size is bigger then  mon_threshold, section will be considered as stale
    'dangerous_file_size':            10485760000, # 10gb, ~10 hours of work
    'lines_bundle_min_size':          10000,       # forced send data to CH if more than 10000 raw lines parsed, but no more than a bundle in 10 sec
    'lines_bundle_timeout':           10,
    'request_timeout':                20,
    'connect_timeout':                1,
    'ch_user':                        'default',
    'ch_passwd':                      None,
    'json':                           None,
    'date_column':                   '',
    'constant_columns':              {}
}


def get_metadata_filename(logfile, logType, cache_dir):
    return "%s/%s_%s" % (cache_dir, logfile.replace('/', '_'), logType)


def set_lock_file(fd):
    for _ in range(3):
        try:
            fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
            return True
        except IOError as e:
            if e.errno != errno.EAGAIN:
                logging.debug("Error lock file %s: %s" % (dataFile, e))
                break
            else:
                time.sleep(1)
                logging.debug("Can't lock file %s. Trying again" % dataFile)
    logging.debug("Max tryes lock file %s. Exit" % dataFile)
    return False

def getLogPos(logFile, logType, cache_dir):
    first_run = False
    datafile = get_metadata_filename(logFile, logType, cache_dir)
    try:
        with open(datafile, 'r') as fd:
            if not set_lock_file(fd): raise
            posdata = fd.readline().split(":")
            fcntl.flock(fd, fcntl.LOCK_UN)
        (inode, position) = posdata[0:2]
        if len(posdata) > 2 and int(posdata[2]) > 0:
            first_run = True
    except Exception as e:
        logging.debug("Can't open pos file %s: %s" % (logFile, str(e)))
        inode = 0
        position = 0
        first_run = True
    return int(inode), int(position), first_run


def get_config():
    conf_dir = '/etc/logpusher/'
    conf_d_dir = os.path.join(conf_dir, 'conf.d')

    iniconfig = ConfigParser.RawConfigParser()
    iniconfig.readfp(open(os.path.join(conf_dir, 'main.cfg')))
    _inis = iniconfig.__dict__['_sections']
 
    listConfigs = [os.path.join(conf_d_dir, x) for x in os.listdir(conf_d_dir) if x.endswith('.conf')]
    # Читаем конфиги. Игнорируем повторяющиеся секции кроме [main].
    countMainSection = list()
    for i in listConfigs:
        j = ConfigParser.RawConfigParser()
        j.read(i)
        _ss = j.__dict__['_sections']
        if _ss.has_key('main'):
            countMainSection.append(i)
        diffSections = set(_ss.keys()) & set(_inis.keys()) - set(['main'])
        if len(diffSections) > 0:
            logging.critical("Founded double sections %s in %s. Ignoring load this section.", list(diffSections), i)
        [ _ss.pop(k) for k in diffSections ]
        if len(_ss) == 0: continue
        _inis.update(_ss)
    if len(countMainSection) >1:
        logging.critical("Founded doublicate [main] sections in %s", ', '.join(countMainSection))  
        return None

    conf = Config()
    for section in iniconfig.sections():
        conf[section] = Config(defaults)

        sections = ['main']
        if section != 'main':
            sections.append(section)
        for orig_section in sections:
            for opt in iniconfig.options(orig_section):
                if opt not in defaults.keys():
                    logging.warning("Unknown option %s in section %s, ignoring" % (opt, section))
                    continue
                val = conf[section][opt]
                try:
                    if type(defaults[opt]) == int:
                        val = int(iniconfig.get(orig_section, opt))
                    elif type(defaults[opt]) == bool:
                        val = iniconfig.getboolean(orig_section, opt)
                    elif type(defaults[opt]) == list:
                        val = [ x for x in iniconfig.get(orig_section, opt).split(',') if len(x) > 0 ]
                    elif type(defaults[opt]) == dict:
                        val = dict([ (i.split(':')[0], i.split(':')[1]) for i in iniconfig.get(orig_section, opt).split(',') if len(i) > 0 ])
                    elif type(iniconfig.get(orig_section, opt)) == str:
                        val = iniconfig.get(orig_section, opt)
                    else:
                        logging.warning("Unknown option %s or bad type in section %s: schema type %s, conf type %s" % (opt, orig_section, type(defaults[opt]), type(iniconfig.get(orig_section, opt))))
                except Exception as e:
                    logging.error("Can't parse option %s in section %s: %s" % (opt, section, str(e)))
                    raise
 
                conf[section][opt] = val

        conf[section]['bot_url_template'] = 'http://' + conf[section]['bot_host'] + '/api/osinfo.php?fqdn=%s&output=loc_segment4&format=simple'
        conf[section]['regexp_compiled'] = re.compile(conf[section]['regexp']) if conf[section].has_key('regexp') else None
        conf[section]['columns'] = conf[section]['ch_columns'] + conf[section]['user_columns'] if conf[section]['user_columns'] else conf[section]['ch_columns']
        conf[section]['regexp_convert_time'] = re.compile('.*\d+\s?(?P<zone>[a-zA-Z/]+)$')
        conf[section]['fqdn'] = str(socket.getfqdn())
        if conf[section]['store_full_request_time']:
            conf[section]['columns'].append('full_request_time')

        if section == 'main':
            continue
        for opt, val in conf[section].items():
            if (type(val) == str and val == 'REQUIRED') or (type(val) == list and len(val) > 0 and val[0] == 'REQUIRED'):
                logging.error("No required option %s in config section %s" % (opt, section))
                return None

 
    logging.debug("Started with config: " + str(conf))
    return conf
