#!/usr/bin/env python2
# coding: utf8
import time
import datetime
import os
import shlex
import sys
from collections import namedtuple, Counter

sys.path.append(os.path.dirname(os.path.abspath(__file__)))

try:
    from nginx_log_normalize_path import nginx_log_normalize_path
except ImportError:
    def nginx_log_normalize_path(path):
        if os.getenv('SOLOMON_SKIP_PATH_NORMALIZATION'):
            return path
        return '/'

try:
    from nginx_log_normalize_path import nginx_log_normalize_record
except ImportError:
    def nginx_log_normalize_record(record):
        return nginx_log_normalize_path(record.uri)


DEFAULT_STATUS_CODES = ['200', '204', '400', '403', '404', '428', '429', '499', '500', '502', '503', '504']
NGINX_LOG_PATH = '/var/log/nginx/access.log'
OFFSET_STORAGE_PATH = '/tmp/nginx_offset'
DATE_FORMAT = '%d/%b/%Y:%H:%M:%S'
DEFAULT_FIELDS_STRING = (
    'time_local,http_host,remote_addr,request,status,http_referer,http_user_agent,'
    'http_cookie,request_time,upstream_cache_status,bytes_sent,upstream_response_time,'
    'uri,args,ssl_session_id,http_x_request_id'
)
LOG_FIELDS = os.getenv('SOLOMON_NGINX_FIELDS', DEFAULT_FIELDS_STRING).split(',')

NginxLogRecord = namedtuple('NginxLogRecord', LOG_FIELDS)

SOLOMON_IGNORE_URI = os.getenv('SOLOMON_IGNORE_URI')
IGNORE_URI_LIST = SOLOMON_IGNORE_URI.split(',') if SOLOMON_IGNORE_URI else []


def to_seconds(date):
    return time.mktime(date.timetuple())


class NginxLogParser:
    def __init__(self, logger, registry):
        """
        https://wiki.yandex-team.ru/solomon/agent/modules/#python2
        https://st.yandex-team.ru/RASPINFRA-408

        :param registry: library.python.monlib.metric_registry.pyx::MetricRegistry
        """
        self._logger = logger
        self._requests_counter = Counter()
        self._timings_histogram = registry.histogram_rate(
            {'sensor': 'timings_hist'},
            'exponential',
            base=1.3,
            bucket_count=48,
            scale=1,
        )

    @staticmethod
    def _load_offset():
        try:
            with open(OFFSET_STORAGE_PATH, 'r') as f:
                return int(f.read())
        except ValueError:
            return 0

    @staticmethod
    def _save_offset(offset):
        with open(OFFSET_STORAGE_PATH, 'w') as f:
            f.write(str(offset))

    def _parse_date(self, str_date):
        str_dt, tz = str_date[1:-1].split(' ')
        date = datetime.datetime.strptime(str_dt, DATE_FORMAT)
        # time.mktime считает в локальном времени, не нужно учитывать таймзону
        # sign = -1 if tz[0] == '+' else 1
        # date += sign * datetime.timedelta(hours=int(tz[1:3]), minutes=int(tz[3:]))
        return date

    def _parse_line(self, line):
        try:
            date_end_position = line.find(']') + 1
            date = self._parse_date(line[:date_end_position])
            record = NginxLogRecord(date, *shlex.split(line[date_end_position + 1:]))
            return record
        except Exception as e:
            self._logger.error('Error while parsing {}: {}'.format(line, e))
            return None

    def _parse_logs(self):
        nginx_lines = []
        with open(NGINX_LOG_PATH, 'r') as nginx_log:
            if os.path.isfile(OFFSET_STORAGE_PATH):
                nginx_log.seek(0, os.SEEK_END)
                end = nginx_log.tell()
                nginx_log.seek(self._load_offset())
                s = nginx_log.tell()
                # usually that means rotated log
                if s > end:
                    nginx_log.seek(0)
                nginx_lines = nginx_log.read().splitlines()
                self._save_offset(nginx_log.tell())
            else:
                nginx_log.seek(0, os.SEEK_END)
                end = nginx_log.tell()
                self._save_offset(end)
        for line in nginx_lines:
            record = self._parse_line(line)
            if record:
                yield record

    def pull(self, ts, consumer):
        for record in self._parse_logs():
            method = record.request.split(' ', 1)[0]
            if record.uri == '/ping' and not os.getenv('SOLOMON_HANDLE_PING_REQUESTS'):
                continue
            if record.uri.startswith('/unistat') and not os.getenv('SOLOMON_HANDLE_UNISTAT_REQUESTS'):
                continue
            if any([uri in record.uri for uri in IGNORE_URI_LIST]):
                continue

            uri_paths = nginx_log_normalize_record(record)
            if isinstance(uri_paths, str):
                uri_paths = [uri_paths]
            for uri_path in uri_paths:
                consumer.gauge({
                    'sensor': 'timings',
                    'uri': uri_path,
                    'status': record.status,
                    'method': method,
                }, to_seconds(record.time_local), float(record.request_time))
                self._timings_histogram.collect(int(float(record.request_time) * 1000))
                self._requests_counter[(uri_path, record.status, method)] += 1
                for status_code in DEFAULT_STATUS_CODES:
                    self._requests_counter[(uri_path, status_code, method)] += 0

        for key, value in self._requests_counter.items():
            consumer.rate({
                'sensor': 'count',
                'uri': key[0],
                'status': key[1],
                'method': key[2],
            }, 0, value)
