#!/usr/bin/env python
#
# Provides: check_s3_headers
#
# check header for some paths


import sys
import json
import shlex
import random
import socket
import logging
import optparse
import subprocess
import datetime

config = { 'check_package': { 'web4static': { 'check_uri': {'/s3/web4static/__monitoring-object.gif': { 'headers': {
                                                                                                            # for diff header values
                                                                                                            'Content-Type': 'image/gif',
                                                                                                            'Connection': 'keep-alive',
                                                                                                            #'Cache-Control': 'public, max-age=315360000',
                                                                                                            'Cache-Control': 'public, max-age=31556952',
                                                                                                            'Access-Control-Allow-Origin': '*',
                                                                                                            'Strict-Transport-Security': 'max-age=43200000; includeSubDomains;',
                                                                                                            'Timing-Allow-Origin': '*',
                                                                                                            'Vary': 'Accept-Encoding',
                                                                                                            'Accept-Ranges': 'bytes',
                                                                                                             # for diff header keys
                                                                                                            'Server': 'nginx/1.12.2',
                                                                                                            'X-Nginx-Request-Id': 'fa3bf1198a21a907',
                                                                                                            'ETag': 'd89746888da2d9510b64a9f031eaecd5',
                                                                                                            'Expires': (31555500, 31557500),
                                                                                                            'Content-Length': '42',
                                                                                                            'Last-Modified': 'Fri, 15 Mar 2019 21:54:54 GMT'}
                                                           }                                 }
                                            },
                              'market-skubi': { 'check_uri': { '/market-skubi/__monitoring-object.gif': { 'headers': { # for diff header values
                                                                                                            'Content-Type': 'image/gif',
                                                                                                            'Connection': 'keep-alive',
                                                                                                            #'Cache-Control': 'public, max-age=315360000',
                                                                                                            'Cache-Control': 'public, max-age=31556952',
                                                                                                            'Access-Control-Allow-Origin': '*',
                                                                                                            'Strict-Transport-Security': 'max-age=43200000; includeSubDomains;',
                                                                                                            'Timing-Allow-Origin': '*',
                                                                                                            'Vary': 'Accept-Encoding',
                                                                                                            'Accept-Ranges': 'bytes',
                                                                                                            # for diff header keys
                                                                                                            'Server': 'nginx/1.12.2',
                                                                                                            'X-Nginx-Request-Id': 'af3bc2ab23aa40ab',
                                                                                                            'ETag': '5b73f8d3-2a',
                                                                                                            'Expires': (31555500, 31557500),
                                                                                                            'Content-Length': '42',
                                                                                                            'Last-Modified': 'Wed, 15 Aug 2018 09:56:35 GMT' }
                                                           }                                 }
                                            }
                            }
         }
'''
list_accept_encoding = [
# gzip

# Should NOT match:
#   qvalue of 0 means "not acceptable."
    ['deflate, gzip;q=0, x-gzip, identity, *;q=0', None],
    ['gzip;q=0, x-gzip, identity, *;q=0', None],
    ['x-gzip, identity, *;q=0, gzip;q=0', None],
    ['gzip;q=0', None],
    ['gzip; q=0.000, identity; q=0.5, *;q=0', None],
    ['*', None],
    ['gzip; q=0.000001, identity; q=0.5, *;q=0', None],
    ['gzip; q=0.000001000, identity; q=0.5, *;q=0', None],

# Should match:
    ['gzip', 'gzip'],
    ['deflate, gzip;q=0.1, x-gzip, identity, *;q=0.1', 'gzip'],
    ['gzip;q=0.1, x-gzip, identity, *;q=0.1', 'gzip'],
    ['x-gzip, identity, *;q=0.1, gzip;q=0.1', 'gzip'],
    ['gzip;q=0.1', 'gzip'],
    ['deflate, gzip, x-gzip, identity, *;q=0', 'gzip'],
    ['gzip, x-gzip, identity, *;q=0', 'gzip'],
    ['x-gzip, identity, *;q=0, gzip', 'gzip'],
    ['compress, gzip', 'gzip'],
    ['compress;q=0.5, gzip;q=1.0', 'gzip'],
    ['gzip;q=1.0, identity; q=0.5, *;q=0', 'gzip'],
    ['gzip;  q=1.0, identity; q=0.5, *;q=0', 'gzip'],

# If multiple content-codings are acceptable, then the acceptable content-coding with the highest non-zero qvalue is preferred.

# br
    ['deflate, br;q=0, x-br, identity, *;q=0', None],
    ['br;q=0, x-br, identity, *;q=0', None],
    ['x-br, identity, *;q=0, br;q=0', None],
    ['br;q=0', None],
    ['br; q=0.000, identity; q=0.5, *;q=0', None],
    ['*', None],
#    ['br; q=0.000001, identity; q=0.5, *;q=0', None],
#    ['br; q=0.000001000, identity; q=0.5, *;q=0', None],

# Should match:
    ['br', 'br'],
    ['deflate, br;q=0.1, x-br, identity, *;q=0.1', 'br'],
    ['br;q=0.1, x-br, identity, *;q=0.1', 'br'],
    ['x-br, identity, *;q=0.1, br;q=0.1', 'br'],
    ['br;q=0.1', 'br'],
    ['deflate, br, x-br, identity, *;q=0', 'br'],
    ['br, x-br, identity, *;q=0', 'br'],
    ['x-br, identity, *;q=0, br', 'br'],
    ['compress, br', 'br'],
    ['compress;q=0.5, br;q=1.0', 'br'],
    ['br;q=1.0, identity; q=0.5, *;q=0', 'br'],
    ['br;  q=1.0, identity; q=0.5, *;q=0', 'br'],

# mixed
    ['deflate, gzip;q=0, br;q=0, identity, *;q=0', None],
    ['br;q=0, x-br, x-gzip, identity, *;q=0', None],

# Should match:
    ['br, gzip', 'br'],
    ['deflate, gzip, br;q=0.1, x-br, identity, *;q=0.1', 'br'],
    ['gzip, br;q=0.1, x-br, identity, *;q=0.1', 'br'],
    ['gzip, x-br, identity, *;q=0.1, br;q=0.1', 'br'],
    ['gzip, br;q=0.1', 'br'],
    ['gzip, deflate, br, x-br, identity, *;q=0', 'br'],
    ['gzip, br, x-br, identity, *;q=0', 'br'],
    ['gzip, x-br, identity, *;q=0, br', 'br'],
    ['gzip, compress, br', 'br'],
    ['gzip, compress;q=0.5, br;q=1.0', 'br'],
    ['gzip, br;q=1.0, identity; q=0.5, *;q=0', 'br'],
    ['gzip, br;  q=1.0, identity; q=0.5, *;q=0', 'br'],
'''
list_accept_encoding = [
# From logs:
    ['', None],
    ['-', None],
    ['br, gzip, deflate', 'br'],
    ['deflate', None],
    ['deflate, gzip', 'gzip'],
    ['deflate, gzip, sdch', 'gzip'],
    ['gzip', 'gzip'],
    ['gzip, br', 'br'],
    ['gzip, br, peerdist', 'br'],
    ['gzip, deflate', 'gzip'],
    ['gzip, deflate, br', 'br'],
    ['gzip, deflate, br, peerdist', 'br'],
    ['gzip, deflate, gzip', 'gzip'],
    ['gzip, deflate, gzip, br', 'br'],
    ['gzip, deflate, identity', 'gzip'],
    ['gzip, deflate, lzma', 'gzip'],
    ['gzip, deflate, lzma, br', 'br'],
    ['gzip, deflate, lzma, sdch', 'gzip'],
    ['gzip, deflate, lzma, sdch, br', 'br'],
    ['gzip, deflate, peerdist', 'gzip'],
    ['gzip, deflate, sdch', 'gzip'],
    ['gzip, deflate, sdch, br', 'br'],
    ['gzip, deflate, x-gzip, x-deflate', 'gzip'],
    ['gzip, lzma, sdch', 'gzip'],
    ['gzip, peerdist', 'gzip'],
    ['gzip, sdch', 'gzip'],
    ['gzip, sdch, br', 'br'],
    ['gzip,deflate', 'gzip'],
    ['gzip,deflate,br', 'br'],
    ['gzip,deflate,bzip2,sdch', 'gzip'],
    ['gzip,deflate,identity', 'gzip'],
    ['gzip,deflate,lzma', 'gzip'],
    ['gzip,deflate,lzma,sdch', 'gzip'],
    ['gzip,deflate,sdch', 'gzip'],
    ['identity', None],
    ['peerdist', None]

]

class CmdExecuteError(Exception):
    def __init__(self, code, cmd, stdout=None, stderr=None):
        self.code = code
        self.cmd = cmd
        self.stdout = stdout
        self.stderr = stderr

    def __str__(self):
        return "Command '{}' returned non-zero exit status {} (stdout={}, stderr={})".format(
            self.cmd, self.code, self.stdout, self.stderr)


class Execute(object):
    def __init__(self, cmd):
        self.cmd = shlex.split(cmd)

    def run(self):
        p = subprocess.Popen(self.cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        stdout, stderr = p.communicate()
        if p.returncode != 0:
            raise CmdExecuteError(p.returncode, self.cmd, stdout, stderr)
        return stdout


class CheckHeaders(object):
    def __init__(self, config):
        self.config = config
        self.list_failed_headers = []
        self.list_failed_accept_encodings = []
        self.list_failed_checks = []
        self.check_all_uri()
        self.print_result()

    def check_all_uri(self):
        self.check_cached_services([
        self.config['check_package']['market-skubi']['check_uri'],
                self.config['check_package']['web4static']['check_uri']])

    def check_cached_services(self, data):
        hostname = socket.gethostname()
        for d in data:
            for uri in d:
                for accept_encoding, content_encoding in list_accept_encoding:
                    if uri.startswith('/market-skubi') and content_encoding == 'br':
                            content_encoding = 'gzip'
                    result = {}

                    cmd = 'curl -sI -H "Accept-Encoding: {}" -H "Host: yastatic.net" http://{}{}?{}'.format(accept_encoding, hostname, uri, random.randrange(1000000000000))
                    if options.show:
                        sys.stderr.write("{0}\n".format(cmd))
                    result[uri] = {}
                    for item in Execute(cmd).run().split('\n')[:-2]:
                        if item.startswith('HTTP'):
                            protocol_version, code= item.split(' ')[0:2]
                            if int(code) != 200:
                                print_formated('crit', 'request cmd={} failed with status code={}'.format(cmd, code))
                                exit()
                            continue
                        key, value = item.split(': ', 1)
                        if key.startswith('Date') or key.startswith('X-Robots-Tag') or key.startswith('ETag'):
                            continue
                        result[uri][key] = value.rstrip()
                    if 'Content-Encoding' not in result[uri] and content_encoding != None:
                        print(json.dumps(result[uri], indent=4))
                        self.list_failed_accept_encodings.append('failed Accept-Encoding uri={} for Accept-Encoding={} content_encoding={}'.format(uri, accept_encoding, content_encoding))
                        if options.show:
                            sys.stderr.write('failed Accept-Encoding for uri={} for Accept-Encoding={} content_encoding={}\n'.format(uri, accept_encoding, content_encoding))
                    if 'Content-Encoding' in result[uri]:
                        if content_encoding != result[uri]['Content-Encoding']:
                            self.list_failed_accept_encodings.append('failed Accept-Encoding for accept_encoding={} content_encoding: {} != {}'.format(accept_encoding, content_encoding, result[uri]['Content-Encoding']))
                            if options.show:
                                sys.stderr.write('failed Accept-Encoding for accept_encoding={} content_encoding: {} != {}\n'.format(accept_encoding, content_encoding, result[uri]['Content-Encoding']))

            result = {}
            for uri in d:
                pattern = d[uri]['headers']

                cmd = 'curl -sI -H "Host: yastatic.net" http://{}{}?{}'.format(hostname, uri, random.randrange(1000000000000))
                if options.show:
                    sys.stderr.write("{0}".format(cmd))
                result[uri] = {}
                for item in Execute(cmd).run().split('\n')[:-2]:
                    if item.startswith('HTTP'):
                        protocol_version, code = item.split(' ')[0:2]
                        if int(code) != 200:
                            print_formated('crit', 'request cmd={} failed with status code={}'.format(cmd, code))
                            exit()
                        continue
                    key, value = item.split(': ', 1)
                    if key.startswith('Date'):
                        continue
                    if key.startswith('X-Robots-Tag'):
                        continue
                    if key.startswith('Etag'):
                        continue
                    if key.startswith('Keep-Alive'):
                        continue
                    if key.startswith('NEL'):
                        continue
                    if key.startswith('Report-To'):
                        continue
                    result[uri][key] = value.rstrip()
            uri = result.keys()
            uri = uri[0]
            diff = list(set(result[uri].keys()) - set(pattern.keys()))

            if len(diff) > 0:
                print_formated('crit', 'different headers {} in the answers for request {}'.format(','.join(diff), cmd))
                exit()

            if options.show:
                sys.stderr.write("{0}\n".format((json.dumps(result, indent=4))))
                sys.stderr.write("{0}\n".format((json.dumps(pattern, indent=4))))

            for key in result[uri]:
                if result[uri][key] != pattern[key]:
                    if key in ['Expires',]:
                        now = datetime.datetime.utcnow()
                        result_date = datetime.datetime.strptime(result[uri][key], '%a, %d %b %Y %H:%M:%S GMT')
                        low_diff = datetime.timedelta(seconds=pattern[key][0])
                        high_diff = datetime.timedelta(seconds=pattern[key][1])

                        time_diff = result_date - now

                        if time_diff < low_diff or time_diff > high_diff:
                            self.list_failed_headers.append('failed Expires header={} for uri={} key {} != {}'.format(result[uri][key], uri, time_diff.total_seconds(), pattern[key]))
                            if options.show:
                                sys.stderr.write('failed Expires header={} for uri={} key {} != {}\n'.format(result[uri][key], uri, time_diff.total_seconds(), pattern[key]))
                    elif key not in ['Last-Modified', 'ETag', 'X-Nginx-Request-Id']:
                        self.list_failed_headers.append('failed header={} for uri={} key {} != {}'.format(key, uri, result[uri][key], pattern[key]))
                        if options.show:
                            sys.stderr.write('failed header={} for uri={} key {} != {}\n'.format(key, uri, result[uri][key], pattern[key]))

    def print_result(self):
        if len(self.list_failed_headers) > 0:
            self.list_failed_checks.append('FAILED HEADERS KEYS: {0}'.format(str(self.list_failed_headers)))
            if options.show:
                sys.stderr.write("{0}\n".format(('\n'.join(self.list_failed_headers))))
        if len(self.list_failed_accept_encodings) > 0:
            self.list_failed_checks.append('FAILED ACCEPT-ENCODING: {0]'.format(str(self.list_failed_accept_encodings)))

        if len(self.list_failed_checks) > 0:
            print_formated('crit', ', '.join(self.list_failed_checks))
        else:
            print_formated('ok', 'ok')


def print_formated(status, text):
    if status == 'ok':
        print('PASSIVE-CHECK:check_s3_headers;0;{}'.format(text))
    elif status == 'warn':
        print('PASSIVE-CHECK:check_s3_headers;1;{}'.format(text))
    else:
        print('PASSIVE-CHECK:check_s3_headers;2;{}'.format(text))

def main():
    try:
        CheckHeaders(config)
    except Exception as e:
        print_formated('crit', 'cannot check headers')
        print(e)

if __name__ == '__main__':
    p = optparse.OptionParser(usage='%prog [options]',
                              version='1.0')
    p.add_option('-v', '--verbose', action='store_true', help='enable output to console')
    p.add_option('-s', '--show', action='store_true', help='show failed items')
    (options, args) = p.parse_args()

    logging.basicConfig(format='[%(asctime)s.%(msecs)03d] [%(name)s] [%(levelname)s] [%(process)d] [%(threadName)s] %(message)s',
                        level=logging.INFO)

    if not options.verbose:
        logging.getLogger(__name__).disabled = True

    logger = logging.getLogger(__name__)
    main()
