#!/usr/bin/env python

import logging
import yaml
import os
import argparse
import sys

log = logging
team_config_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'teams')

# System metric aggregation recording rules
system_metric_aggregations = {
        "cpu": "sum(rate(node_cpu{%s}[1m])) without (cpu, hostname, instance)",
        "disk_writes_completed": "sum(rate(node_disk_writes_completed{%s}[1m])) without (hostname, instance)",
        "disk_reads_completed": "sum(rate(node_disk_reads_completed{%s}[1m])) without (hostname, instance)",
        "filesystem_free": "sum(node_filesystem_free{%s}) without (hostname, instance)",
        "filesystem_size": "sum(node_filesystem_size{%s}) without (hostname, instance)",
        "network_receive_bytes": "sum(rate(node_network_receive_bytes{%s}[1m])) without (hostname, instance)",
        "network_transmit_bytes": "sum(rate(node_network_transmit_bytes{%s}[1m])) without (hostname, instance)",
        "network_receive_packets": "sum(rate(node_network_receive_packets{%s}[1m])) without (hostname, instance)",
        "network_transmit_packets": "sum(rate(node_network_transmit_packets{%s}[1m])) without (hostname, instance)",
        "network_receive_errs": "sum(rate(node_network_receive_errs{%s}[1m])) without (hostname, instance)",
        "network_transmit_errs": "sum(rate(node_network_transmit_errs{%s}[1m])) without (hostname, instance)",
        "memory_Active": "sum(node_memory_Active{%s}) without (hostname, instance)",
        "memory_Inactive": "sum(node_memory_Inactive{%s}) without (hostname, instance)",
        "memory_Buffers": "sum(node_memory_Buffers{%s}) without (hostname, instance)",
        "memory_Cached": "sum(node_memory_Cached{%s}) without (hostname, instance)",
        "memory_MemTotal": "sum(node_memory_MemTotal{%s}) without (hostname, instance)",
        "memory_MemAvailable": "sum(node_memory_MemAvailable{%s}) without (hostname, instance)",
        "memory_MemFree": "sum(node_memory_MemFree{%s}) without (hostname, instance)",
        "memory_Dirty": "sum(node_memory_Dirty{%s}) without (hostname, instance)",
        "up": "sum(up{%s}) without (hostname, instance)",
        "load1": "sum(node_load1{%s}) without (hostname, instance)",
        "load5": "sum(node_load5{%s}) without (hostname, instance)",
        "load15": "sum(node_load15{%s}) without (hostname, instance)",
}

class ScrapeConfig:
    def __init__(self, pop, service, port, environment, parse_hostname, system_metrics, bare_metal=False):
        if service == 'nodeinfo' and not bare_metal:
            panic("refusing to generate AWS scrape config against consul service 'nodeinfo'")
        self.pop = pop
        self.service = service
        self.port = port
        self.environment = environment
        self.parse_hostname = parse_hostname
        self.system_metrics = system_metrics
        self.bare_metal = bare_metal
        
    def generate_config(self):
        return {
            'job_name': self.job_name(),
            'consul_sd_configs': self.consul_sd_configs(),
            'relabel_configs': self.relabel_configs()
        }

    def job_name(self):
        if self.system_metrics:
            job_service_name = self.service + "-system-metrics"
        else:
            job_service_name = self.service
        if self.environment:
            return "%s_%s_%s" % (job_service_name, self.pop, self.environment)
        else:
            return "%s_%s" % (job_service_name, self.pop)

    def consul_sd_configs(self):
        return [{
            'server': 'localhost:8500',
            'datacenter': self.pop,
            'services': [self.service],
            'tag_separator': '#'
        }]

    def relabel_configs(self):
        rc = []
        rc.append(self.pop_relabel_config())
        rc.append(self.address_relabel_config())
        if not (self.system_metrics and self.bare_metal):
            rc.append(self.service_relabel_config())
        if self.parse_hostname:
            rc.append(self.hostname_relabel_config())
            rc.append(self.hostgroup_relabel_config())
        if self.environment:
            rc.append(self.environment_relabel_config())
            rc.append(self.environment_drop_relabel_config())
        return rc

    def service_relabel_config(self):
        return {
            'source_labels': ['__meta_consul_service'],
            'regex': '^(.*)$',
            'target_label': 'service',
            'replacement': '$1'
        }

    def pop_relabel_config(self):
        return {
            'source_labels': ['__meta_consul_dc'],
            'regex': '^(.*)$',
            'target_label': 'pop',
            'replacement': '$1'
        }

    def environment_relabel_config(self):
        return {
            'source_labels': ['__meta_consul_tags'],
            'regex': "#(%s)#" % self.environment,
            'replacement': '$1',
            'target_label': 'environment'
        }

    def environment_drop_relabel_config(self):
        return {
            'source_labels': ['__meta_consul_tags'],
            'regex': '#(%s)#' % self.environment,
            'action': 'keep'
        }

    def address_relabel_config(self):
        return {
            'source_labels': ['__meta_consul_address'],
            'regex': '^(.*)$',
            'target_label': '__address__',
            'replacement': "$1:%d" % self.port
        }

    def hostname_relabel_config(self):
        return {
            'source_labels': ['__meta_consul_node'],
            'regex': '^([^.]*).*$', # match everything before the first period (or everything if no period)
            'target_label': 'hostname',
            'replacement': '$1'
        }

    def hostgroup_relabel_config(self):
        return {
            'source_labels': ['__meta_consul_node'],
            'regex': '^(.*)-[0-9a-f]*(\.(dev|development|staging|prod|production|darklaunch|canary))?$',
            'target_label': 'hostgroup',
            'replacement': '$1'
        }


class FederateScrapeConfig:
    def __init__(self, name, matchers, endpoints):
        self.name = name
        self.matchers = matchers
        self.endpoints = endpoints

    def generate_config(self):
        return {
            "job_name": self.name,
            "scrape_interval": "10s",
            "honor_labels": True,
            "metrics_path": "/federate",
            "params": {
                "match[]": self.matchers
            },
            "static_configs": [
                {
                    "targets": self.endpoints
                }
            ]
        }


# Recording rule and alerts config
# This is essentially a singleton that
# gets built up in a single object and written
# out into one big file
class SystemMetricsRuleGroup:
    def __init__(self, pop, service=None, environment=None):
        self.rules = []
        # Gather up labels to filter on for system metric aggregated metrics
        # ... service should really only be None if working with baremetal
        labels = []
        label_str_fmt = "%s=\"%s\""
        labels.append(label_str_fmt % ("pop", pop))
        # generate a rule group name while also preparing 
        # label filters for recording rules
        self.name = pop
        if service:
            labels.append(label_str_fmt % ("service", service))
            self.name = service + "-" + self.name
        if environment:
            labels.append(label_str_fmt % ("environment", environment))
            self.name = self.name + "-" + environment
        self.name = self.name + "-" + "system-metrics"
        label_str = ", ".join(labels)
        # Iterate over pre-defined system metric aggregations (above)
        for metric_name, query in system_metric_aggregations.items():
            aggregate_name = "aggregate_" + metric_name
            rule = {
                "record": aggregate_name,
                "expr": query % label_str,
                "labels": {
                    "lts": "cloudwatch"
                }
            }
            self.rules.append(rule)


    def generate_config(self):
        return {
            'name': self.name,
            'rules': self.rules
        }
                
    def add_group(self, group):
        self.groups.append(group)


# returns a full recording rule group for aggregated system metrics
# over the given label set.
def generate_system_metric_rule_group(pop, service=None, environment=None):
    return {
        "name": group_name,
        "rules": rules
    }


def generate_consul_aws_application_scrapes(configs):
    sc = []
    for config in configs:
        # Make one scrape config for each env specified
        if config.get('environments'):
            envs = config.get('environments')
        # If no environments specified, just make one scrape
        # config with no environment filtering (None)
        else:
            envs = [None]
        for env in envs:
            sc.append(ScrapeConfig(
                config['pop'],
                config['service'],
                config['port'],
                env,
                config.get('parse_hostname', False),
                False
            ))
    return sc

def generate_consul_aws_system_scrapes(configs):
    sc = []
    for config in configs:
        # One scrape config per environment
        if config.get('environments'):
            envs = config.get('environments')
        # If no environments specified, just make one scrape
        # config with no environment filtering (None)
        else:
            envs = [None]
        for env in envs:
            sc.append(ScrapeConfig(
                config['pop'],
                config['service'],
                config.get('port', 9100), # defaults if not given
                env,
                config.get('parse_hostname', False),
                True
            ))
    return sc


def generate_consul_bare_metal_application_scrapes(configs):
    sc = []
    for config in configs:
        sc.append(ScrapeConfig(
            config['pop'],
            config['service'],
            config['port'],
            None,
            True,
            False,
            bare_metal=True
        ))
    return sc

def generate_consul_bare_metal_system_scrapes(configs):
    sc = []
    for config in configs:
        sc.append(ScrapeConfig(
            config['pop'],
            'nodeinfo',
            config.get('port', 9100),
            None,
            True,
            True,
            bare_metal=True
        ))
    return sc

# Takes exactly 2 arguments:
#   --base-config PATH, path to the base prometheus config file
#   --team TEAM, the team name to generate config for; uses the configs in that team's config directory to generate the output
def main():
    print("Generating prometheus config...")
    # Argument parsing
    parser = argparse.ArgumentParser(description='Generate a complex prometheus configuration from simple input')
    parser.add_argument('--config-base', action='store', dest='config_base', help='path to a base prometheus configuration to append scrape tasks and rules to')
    parser.add_argument('--team', action='store', dest='team', help='team to generate configuration for')
    parser.add_argument('--output', action='store', dest='output', help='path to store the resulting scrape configuration')
    parser.add_argument('--rules-output', action='store', dest='rules_output', help='path to store the resulting recording rules configuration')
    args = parser.parse_args()
    if not args.config_base or not os.path.isfile(args.config_base):
        panic("'--config-base <path>' must be provided")
    if not args.team or not os.path.isdir(os.path.join(team_config_dir, args.team)):
        panic("'--team <name>' must point to a valid team")
    if not args.output or os.path.isfile(os.path.join(args.output)):
        panic("'--output <path>' not specified or already exists")
    if not args.rules_output or os.path.isfile(os.path.join(args.rules_output)):
        panic("'--rules-output <path>' not specified or already exists")
    # Make sure that the scrapes.yaml file is defined
    if not os.path.isfile(os.path.join(team_config_dir, args.team, "scrapes.yaml")):
        panic("No 'scrapes.yaml file found for team '%s'" % args.team)
    scrapes_path = os.path.join(team_config_dir, args.team, 'scrapes.yaml')
    rule_groups_path = os.path.join(team_config_dir, args.team, 'rules.yaml')
    remote_path = os.path.join(team_config_dir, args.team, 'remote.yaml')
    try:
        # Parse in the base config
        base_config = yaml.load(open(args.config_base, 'r'))
        # Parse in the team config
        scrapes = yaml.load(open(scrapes_path, 'r'))
        # Try to load the rules file, but if it doesn't exit in the config thats OK
        # just start an empty rules config
        if os.path.isfile(rule_groups_path):
            rules = yaml.load(open(rule_groups_path, 'r'))
        else:
            rules = {'groups': []}
        if os.path.isfile(remote_path):
            remote_config = yaml.load(open(remote_path, 'r'))
            remote_read_config = remote_config.get('remote_read', [])
            remote_write_config = remote_config.get('remote_write', [])
        else:
            remote_read_config = []
            remote_write_config = []
    except Exception as e:
        panic("unexpected error loading configuration files")
    # Go through each config stanza and try to make a valid configuration
    # Consul
    consul_scrapes = scrapes.get('consul', {})
    if type(consul_scrapes) is not dict:
        panic("key 'consul' is not a dictionary in scrapes.yaml")
    # Extract the sub categories out of the consul scrape config section
    # ... make sure they are all arrays
    consul_aws_application_scrapes = consul_scrapes.get('aws_application_metrics', [])
    if type(consul_aws_application_scrapes) is not list:
        panic("%s->consul->aws_application_metrics is not a list" % args.team)
    consul_aws_system_scrapes = consul_scrapes.get('aws_system_metrics', [])
    if type(consul_aws_system_scrapes) is not list:
        panic("%s->consul->aws_system_metrics is not a list" % args.team)
    consul_bare_metal_application_scrapes = consul_scrapes.get('bare_metal_application_metrics', [])
    if type(consul_bare_metal_application_scrapes) is not list:
        panic("%s->consul->bare_metal_application_metrics is not a list" % args.team)
    consul_bare_metal_system_scrapes = consul_scrapes.get('bare_metal_system_metrics', [])
    if type(consul_bare_metal_system_scrapes) is not list:
        panic("%s->consul->bare_metal_system_metrics is not a list" % args.team)
    federation_scrapes = scrapes.get('federation', [])
    if type(federation_scrapes) is not list:
        panic("%s->federation is not a list of federation configs" % args.team)
    
    # Now we can go ahead and start generating ScrapeConfig objects
    scrape_configs = []
    rule_groups = []
    for scrape_config in generate_consul_aws_application_scrapes(consul_aws_application_scrapes):
        scrape_configs.append(scrape_config)
    for scrape_config in generate_consul_aws_system_scrapes(consul_aws_system_scrapes):
        scrape_configs.append(scrape_config)
    for scrape_config in generate_consul_bare_metal_application_scrapes(consul_bare_metal_application_scrapes):
        scrape_configs.append(scrape_config)
    for scrape_config in generate_consul_bare_metal_system_scrapes(consul_bare_metal_system_scrapes):
        scrape_configs.append(scrape_config)   
    # Create scrape configs for federation tasks
    for scrape_config in federation_scrapes:
        scrape_configs.append(FederateScrapeConfig(scrape_config['name'], scrape_config['matchers'], scrape_config['endpoints']))
    # Generate system metric aggregations for clusters
    for target in consul_bare_metal_system_scrapes:
        rule_groups.append(SystemMetricsRuleGroup(target['pop']))
    for target in consul_aws_system_scrapes:
        environments = target.get('environments', None)
        service = target['service']
        if not environments:
            rule_groups.append(SystemMetricsRuleGroup(target['pop'], service))
        else:
            for env in environments:
                rule_groups.append(SystemMetricsRuleGroup(target['pop'], service, env))

    # Generate a blob of scrape configs
    scrape_configs = map(lambda x: x.generate_config(), scrape_configs)
    rule_groups = map(lambda x: x.generate_config(), rule_groups)
    # Add the generated scrape configs to w/e was already on the host
    base_scrape_configs = base_config.get('scrape_configs', [])
    # Add the generated rule groups to w/e is custom defined by the team
    existing_rule_groups = rules.get('groups', [])

    # Check for external labels being turned off
    if scrapes.get('disable_external_labels', False):
        base_config['global']['external_labels'] = {}

    # merge them together
    scrape_configs = base_scrape_configs + scrape_configs
    base_config['scrape_configs'] = scrape_configs
    # Add remote read/write configs
    base_config['remote_read'] = remote_read_config
    base_config['remote_write'] = remote_write_config
    # Add autogenerate rule groups
    rule_groups = existing_rule_groups + rule_groups
    rules['groups'] = rule_groups

    # Write out the generated main Prometheus config
    output_scrapes_file = open(args.output, 'w')
    output_scrapes_file.write(yaml.dump(base_config))
    output_scrapes_file.close()

    # Write out the generated recording rules config
    output_rules_file = open(args.rules_output, 'w')
    output_rules_file.write(yaml.dump(rules))
    output_rules_file.close()

def panic(msg):
    log.exception(msg)
    sys.exit(1)

if __name__ == "__main__":
    main()
    

