import argparse
import datetime
import functools
import sys
import re
import urllib.parse


class SimpleNoise:
    def __init__(self, msg: str):
        self.msg: str = msg

    def key(self):
        return self.msg


class ResponseErrorNoise(SimpleNoise):
    pattern = re.compile('^WARNING:Response code is (\\d+)\\. Took: ([0-9.\\-+]+)s\\. Url: (.*)\\. Response: (.*)$')

    def __init__(self, msg: str):
        super().__init__(msg)
        data = ResponseErrorNoise.pattern.match(msg)
        if not data:
            raise ValueError('Invalid line: {}'.format(msg))

        self.query = urllib.parse.urlparse(data.group(3)).path
        self.code = int(data.group(1))
        self.error = data.group(4)
        self.ts = float(data.group(2))

    def key(self):
        return self.query


class SimpleNoiseAggregator:
    message_type = SimpleNoise

    def __init__(self, key: str):
        self.key = key
        self.count = 0

    def append(self, msg: SimpleNoise):
        self.count += 1

    def __str__(self):
        return '{} {}'.format(self.count, self.key)

    def __int__(self):
        return self.count


class ResponseErrorNoiseAggregator(SimpleNoiseAggregator):
    message_type = ResponseErrorNoise

    def __init__(self, query: str):
        super().__init__(query)
        self.query: str = query
        self.errors = set()
        self.codes = set()
        self.ts = []

    def append(self, msg: ResponseErrorNoise):
        super().append(msg)
        self.codes.add(msg.code)
        self.errors.add(msg.error)
        self.ts.append(msg.ts)

    def _ts_stats(self):
        return min(self.ts), sum(self.ts) / len(self.ts), max(self.ts)

    def __str__(self):
        return (
            '{} WARNING: Response error for query {}.\n'
            '\tError codes: {},\n'
            '\tErrors: {},\n'
            '\tResponse stats: (min, avg, max) -> {}\n'.format(
                self.count, self.query, self.codes, self.errors, self._ts_stats()
            )
        )


NOISE = {
    'Too many dsts in request:': SimpleNoiseAggregator,
    'Dst was not found': SimpleNoiseAggregator,
    'Response code is': ResponseErrorNoiseAggregator,
}


class Stats:
    def __init__(self):
        self._errors = {}
        self._warnings = {}
        self._noise = {}

    def add_error(self, msg):
        self.__add_msg(self._errors, msg)

    def add_warning(self, msg):
        for prefix, aggregator in NOISE.items():
            if prefix not in msg:
                continue

            try:
                warning = aggregator.message_type(msg)
            except:
                continue

            if prefix not in self._noise:
                self._noise[prefix] = {}

            if warning.key() not in self._noise[prefix]:
                self._noise[prefix][warning.key()] = aggregator(warning.key())

            self._noise[prefix][warning.key()].append(warning)

            return

        self.__add_msg(self._warnings, msg)

    def as_string(self):
        res = ''

        res += self.__container_as_string(self._errors)
        res += self.__container_as_string(self._warnings)

        res += '== Noise:\n'
        for k, noise in self._noise.items():
            s = functools.reduce(lambda x, value: x + value.count, noise.values(), 0)
            res += '==== %s -> %d\n' % (k, s)
            res += self.__noise_as_string(noise, limit=5)

        return res

    @staticmethod
    def __add_msg(container, msg):
        if msg in container:
            container[msg] += 1
        else:
            container[msg] = 1

    @staticmethod
    def __sort_by_frequency(container):
        tmp = [(k, v) for k, v in container.items()]
        tmp.sort(key=lambda x: int(x[1]), reverse=True)
        return tmp

    @staticmethod
    def __container_as_string(container):
        res = ''
        tmp = Stats.__sort_by_frequency(container)
        for k, v in tmp:
            res += '%d %s\n' % (v, k)

        res += '\n'
        return res

    @staticmethod
    def __noise_as_string(container, limit=None):
        res = ''
        tmp = Stats.__sort_by_frequency(container)
        count = 0
        for key, aggregator in tmp:
            res += '{}\n'.format(str(aggregator))
            count += 1
            if limit is not None and count >= limit:
                res += '...\n'
                break

        res += '\n'
        return res


class Parser:
    def __init__(self):
        self.stats = Stats()

    def _parse(self, line):
        if 'Got SIGTERM' in line:
            return

        fields = line.split(' ')
        if len(fields) == 1 or len(fields) == 2:
            return  # Log collecting artifact: hostname without log message

        assert len(fields) >= 5
        lvl = fields[3]
        msg = ' '.join(fields[4:])

        if lvl.startswith('ERROR'):
            self.stats.add_error(lvl + msg)
        elif lvl.startswith('WARNING'):
            self.stats.add_warning(lvl + msg)

    def parse_line(self, line):
        try:
            self._parse(line)
        except Exception as e:
            print('%s --- %s' % (line, e), file=sys.stderr)
            # raise


def parse_log(filename):
    file = open(filename, 'rb')
    parser = Parser()

    for line in file:
        parser.parse_line(line[:-1].decode('utf-8', 'ignore'))

    return parser.stats


def run(argv):
    parser = argparse.ArgumentParser(description='parse error log')
    parser.add_argument('--filename', type=str, help='filename with log to parse')
    args = parser.parse_args(args=argv)

    filename = (
        args.filename
        if args.filename is not None
        else (datetime.datetime.now() - datetime.timedelta(days=1)).strftime(
            '/storage/logs/production-intranet/var/log/fastcgi2/tvm-error.log.%Y%m%d'
        )
    )
    print(filename + '\n')

    stats = parse_log(filename)
    print(stats.as_string())
