#!/usr/bin/env python

"""
Ansible dynamic inventory using consul.internal

Supports 3 groupings from consul:

cluster - e.g. sfo01
host-prefix - e.g. rails-app, ingest
service+tag - e.g. web-rails-worker-hutch-clean-production


Output notes from http://docs.ansible.com/developing_inventory.html

--list output:

{
    "databases"   : {
        "hosts"   : [ "host1.example.com", "host2.example.com" ],
        "vars"    : {
            "a"   : true
        }
    },
    "webservers"  : [ "host2.example.com", "host3.example.com" ],
    "atlanta"     : {
        "hosts"   : [ "host1.example.com", "host4.example.com", "host5.example.com" ],
        "vars"    : {
            "b"   : false
        },
        "children": [ "marietta", "5points" ]
    },
    "marietta"    : [ "host6.example.com" ],
    "5points"     : [ "host7.example.com" ]


    "_meta" : {
       "hostvars" : {
          "moocow.example.com"     : { "asdf" : 1234 },
          "llama.example.com"      : { "asdf" : 5678 },
       }
    }

}

--host <hostname> output:

{
    "favcolor"   : "red",
    "ntpserver"  : "wolf.example.com",
    "monitoring" : "pack.example.com"
}

"""

import os
import collections
import urllib
import optparse
import json
import re
import sys
from pprint import pprint

def run(datacenters=None, service=None, tag=None):
    parser = optparse.OptionParser()
    parser.add_option("--list", dest="list", action="store_true")
    parser.add_option("--groups", dest="groups", action="store_true")
    parser.add_option("--host", dest="host", action="store")
    options, _ = parser.parse_args()

    if options.list:
        print_host_list(datacenters, service, tag)
        sys.exit(0)

    if options.groups:
    	print_groups_list(datacenters)
    	sys.exit(0)

    # We don't implement the --host option, since we implement _meta and don't
    # use any vars from consul in our playbooks yet.
    if options.host:
        print(json.dumps({}))
        sys.exit(0)
    sys.exit(1)

#
# The following methods are used to filter and get host information from node info
#
fqdnRE = re.compile("fqdn=(.*)")


def updateFQDN(ni):
    """
    Pull the FQDN out of the consul node_info tags

    FIXME: The relationship between node name and FQDN can be a little weird
    at some points, especially in the case of virtual hosts (see the virtmasters).
    Talk to the devtools folks and understand what the "correct" behavior should be.
    """
    for tag in ni[u'ServiceTags']:
        m = fqdnRE.match(tag)
        if m:
            ni['fqdn'] = m.group(1)

newPrefixRE = re.compile("^([a-zA-Z-0-9]+)-[\da-f]+\..+$")
oldPrefixRE = re.compile("^([a-zA-Z]+)\d+\..+$")


def updatePrefix(ni):
    """
    Given the name of a node, guess its prefix
    Prefix forms:
    rails-app-d028a.sfo01.justin.tv
    app100.sfo01.justin.tv
    """

    m = newPrefixRE.match(ni['fqdn'])
    if m:
        ni['prefix'] = m.group(1)
        return

    m = oldPrefixRE.match(ni['fqdn'])
    if m:
        ni['prefix'] = m.group(1)
        return
    ni['prefix'] = ni['fqdn']
    return

def print_host_list(datacenters, service, tag):
    """Print pretty output"""
    output = host_list(datacenters, service, tag)
     # For single hosthame lookup
    if not output:
        host_list(datacenters, None, None)
    # Add _meta block (empty for now) - this prevents ansible from iterating using --host
    output['_meta'] = {}
    output['_meta']['hostvars'] = {}
    print json.dumps(output, sort_keys=True, indent=2, separators=(',', ': '))

def print_groups_list(datacenters):
    """Print pretty output"""
    output = host_list(datacenters, None, None)
    # Add _meta block (empty for now) - this prevents ansible from iterating using --host
    output['_meta'] = {}
    output['_meta']['hostvars'] = {}
    keys = output.keys()
    keys.sort()
    for key in keys:
	print key

def host_list(datacenters, target_service, tag):
    """
    Generate and dump the JSON blob that is the lists of hosts that ansible expects.
    Basically, iterate through datacenters, then iterate through services and build
    up lists of host information.

    Then generate the different classes of host grouping that we support.
    """

    if datacenters is None:
        datacenters = json.load(urllib.urlopen("http://consul.internal.justin.tv/v1/catalog/datacenters"))

    nodes = {}
    node_dc_to_fqdn = {}  # Note: The keys are "node.dc"
    services = collections.defaultdict(list)
    prefixes = collections.defaultdict(list)

    output = {}

    for dc in datacenters:
        # If service specified, use that service, otherwise find a list of all services in the datacenter
        if target_service:
            dc_services = {target_service : tag}
        else:
            dc_services = json.load(urllib.urlopen("http://consul.internal.justin.tv/v1/catalog/services?wait=5s&dc={0}".format(dc)))

        # Get all nodes in the datacenter
        dc_nodes = json.load(urllib.urlopen("http://consul.internal.justin.tv/v1/catalog/nodes?wait=5s&dc={0}".format(dc)))

        # Nodeinfo service is special, since it includes fqdn and is on every host
        node_infos = json.load(urllib.urlopen("http://consul.internal.justin.tv/v1/catalog/service/nodeinfo?wait=5s&dc={0}".format(dc)))

        # Add additional per-node metadata to the nodeinfo
        for ni in node_infos:
            ni['datacenter'] = dc
            updateFQDN(ni)
            node_dc_to_fqdn[ni['Node'] + "." + dc] = ni['fqdn']
            updatePrefix(ni)

        node_infos = sorted(node_infos, key=lambda k: k['fqdn'])

        # Add node info into our lists
        for ni in node_infos:
            nodes[ni['fqdn']] = ni
            prefixes[ni['prefix']].append(ni)

        #
        # Iterated through services as opposed to nodes, presumably there are
        # usually less services than nodes in a datacenter.
        #

        for service in dc_services.keys():
            if service not in services:
                services[service] = []
            # If tag is specified, only look ups service with tag
            if tag:
                service_nodes = json.load((urllib.urlopen("http://consul.internal.justin.tv/v1/catalog/service/{0}?wait=5s&dc={1}&tag={2}".format(service, dc,tag))))
            else:
                service_nodes = json.load((urllib.urlopen("http://consul.internal.justin.tv/v1/catalog/service/{0}?wait=5s&dc={1}".format(service, dc))))

            for service_node in service_nodes:
                n_dc = service_node['Node'] + '.' + dc

                # Skip services with no nodes
                if n_dc in node_dc_to_fqdn:
                    service_node['fqdn'] = node_dc_to_fqdn[n_dc]
                    services[service].append(service_node)

    # Create service groups
    for service in services.keys():
        for sn in services[service]:
            # # Generate per-datacenter groups 
            dc = nodes[sn['fqdn']]['datacenter']
            if dc  not in output:
                output[dc] = {}
                output[dc]['hosts'] = []
            output[dc]['hosts'].append(sn['fqdn'])

            # # Create host prefix groups
            prefix = nodes[sn['fqdn']]['prefix']
            if prefix  not in output:
                output[prefix] = {}
                output[prefix]['hosts'] = []
            output[prefix]['hosts'].append(sn['fqdn'])

            if service not in output:
                output[service] = {}
                output[service]['hosts'] = []
            output[service]['hosts'].append(sn['fqdn'])

    # Sort and dedup everything for human readability
    for group in output.keys():
        output[group]['hosts'] = sorted(list(set(output[group]['hosts'])))

   
    if not output:
        output = host_list(datacenters, None, None)

    return output

if __name__ == "__main__":
    evar = os.environ.get('CONSUL_DC')
    service = os.environ.get('CONSUL_SERVICE')
    tag = os.environ.get('CONSUL_TAG')
    if evar == "all":
        run(None, service, tag)
    elif evar:
        run([evar], service, tag)
    else:
        run(["sfo01"], service, tag)
