import time
import datetime
import os
import shlex
import sys
from collections import namedtuple, Counter

sys.path.append(os.path.dirname(os.path.abspath(__file__)))

try:
    from util import nginx_log_normalize_path
except ImportError:
    def nginx_log_normalize_path(path):
        if os.getenv('SOLOMON_SKIP_PATH_NORMALIZATION'):
            return path
        return 'unprocessed'

try:
    from util import nginx_log_normalize_record
except ImportError:
    def nginx_log_normalize_record(record):
        return nginx_log_normalize_path(record.uri)


DEFAULT_STATUS_CODES = ['200', '400', '403', '404', '429', '499', '500']
NGINX_LOG_PATH = '/var/log/nginx/access.log'
OFFSET_STORAGE_PATH = '/tmp/nginx_offset'
DATE_FORMAT = '[%d/%b/%Y:%H:%M:%S +0300]'
DEFAULT_FIELDS_STRING = (
    'time_local,http_host,remote_addr,request,status,http_referer,http_user_agent,'
    'http_cookie,request_time,upstream_cache_status,bytes_sent,upstream_response_time,'
    'uri,args,ssl_session_id,http_x_request_id'
)
LOG_FIELDS = os.getenv('SOLOMON_NGINX_FIELDS', DEFAULT_FIELDS_STRING).split(',')

HANDLE_PING_REQUESTS = os.getenv('SOLOMON_HANDLE_PING_REQUESTS')
HANDLE_UNISTAT_REQUESTS = os.getenv('SOLOMON_HANDLE_UNISTAT_REQUESTS')
HANDLE_TEST_REQUESTS = os.getenv('SOLOMON_HANDLE_TEST_REQUESTS')

NginxLogRecord = namedtuple('NginxLogRecord', LOG_FIELDS)


def to_seconds(date):
    return time.mktime(date.timetuple())


class NginxLogParser:
    def __init__(self, logger, registry):
        self._logger = logger
        self._registry = registry
        self._requests_counter = Counter()
        self._timings_histograms = {}

    @staticmethod
    def _load_offset():
        try:
            with open(OFFSET_STORAGE_PATH, 'r') as f:
                return int(f.read())
        except ValueError:
            return 0

    def _get_timings_histogram(self, labels):
        k = str(labels)
        if k not in self._timings_histograms:
            self._timings_histograms[k] = self._registry.histogram_rate(
                labels,
                'exponential',
                base=1.3,
                bucket_count=48,
                scale=1,
            )

        return self._timings_histograms[k]

    @staticmethod
    def _save_offset(offset):
        with open(OFFSET_STORAGE_PATH, 'w') as f:
            f.write(str(offset))

    def _parse_line(self, line):
        try:
            date_end_position = line.find(']') + 1
            date = datetime.datetime.strptime(line[:date_end_position], DATE_FORMAT)
            record = NginxLogRecord(date, *shlex.split(line[date_end_position + 1:]))
            return record
        except Exception as e:
            self._logger.error('Error while parsing {}: {}'.format(line, e))
            return None

    def _parse_logs(self):
        nginx_lines = []
        with open(NGINX_LOG_PATH, 'r') as nginx_log:
            if os.path.isfile(OFFSET_STORAGE_PATH):
                nginx_log.seek(0, os.SEEK_END)
                end = nginx_log.tell()
                nginx_log.seek(self._load_offset())
                s = nginx_log.tell()
                # usually that means rotated log
                if s > end:
                    nginx_log.seek(0)
                nginx_lines = nginx_log.read().splitlines()
                self._save_offset(nginx_log.tell())
            else:
                nginx_log.seek(0, os.SEEK_END)
                end = nginx_log.tell()
                self._save_offset(end)
        for line in nginx_lines:
            record = self._parse_line(line)
            if record:
                yield record

    def pull(self, ts, consumer):
        for record in self._parse_logs():
            method = record.request.split(' ', 1)[0]
            if record.uri == '/ping' and not HANDLE_PING_REQUESTS:
                continue
            if record.uri.startswith('/unistat') and not HANDLE_UNISTAT_REQUESTS:
                continue
            if record.uri == '/test' and not HANDLE_TEST_REQUESTS:
                continue
            uri_paths = nginx_log_normalize_record(record)
            if not uri_paths:
                continue
            if isinstance(uri_paths, str):
                uri_paths = [uri_paths]
            for uri_path in uri_paths:
                labels = {
                    'sensor': 'timings',
                    'uri': uri_path,
                    'status': record.status,
                    'method': method,
                    'http_host': record.http_host,
                }
                consumer.gauge(labels, to_seconds(record.time_local), float(record.request_time))

                labels['sensor'] = 'timings-histogram'
                self._get_timings_histogram(labels).collect(int(float(record.request_time) * 1000))

                self._requests_counter[(uri_path, record.status, method, record.http_host)] += 1
                for status_code in DEFAULT_STATUS_CODES:
                    self._requests_counter[(uri_path, status_code, method, record.http_host)] += 0

        for key, value in self._requests_counter.items():
            consumer.rate({
                'sensor': 'count',
                'uri': key[0],
                'status': key[1],
                'method': key[2],
                'http_host': key[3],
            }, 0, value)
