"""
Central api for central ring management and health status aggregation

Manages gradual draining and filling of devices as they are removed
and added, via a redis store storing target weights of devices.
"""

import tornado.ioloop
import tornado.web
import tornado.gen
import tornado.httpclient
import tornado.escape
from tornado.process import Subprocess
import argparse
import time
import socket
import subprocess
import copy
import requests
import json
import shutil
import statsd
import socket
import os, sys, os.path
import xmltodict
from swift.common.ring.ring import Ring
import swift.common.ring
import pdb
#import tornadoredis
import redis
import tornado.locks
import traceback
from tornado_lib import CallbackHandler
from collections import defaultdict

last_cached = 0
cached_hw = {}
cached_uuids = {}
stats_client = None

WEIGHT_INCREMENT = 300
WEIGHT_UPDATE_INTERVAL = 60*60*24*5 #update weight every 5 days
REBALANCE_INTERVAL = 60*60*24*5 #check for update every 5 days
CONSUL_SERVICES = [
    'swift-cdn',
    'swift-container',
    'swift-object',
    'swift-proxy',
]
STATSD_HOST = 'stats6.justin.tv'
BACKUP_SERVICES = ['swift-proxy-backup']
ALL_SERVICES = CONSUL_SERVICES + BACKUP_SERVICES
# CONSUL_URL = 'consul.internal.justin.tv/v1/catalog/service/{0}'
CONSUL_URL = 'http://localhost:8500/v1/catalog/service/{0}'
consul_cache = defaultdict(dict)
consul_last_cached = defaultdict(int)
CONSUL_CACHE_DURATION = 60
RING_PATHS = {
    # 'object': '/Users/achang/Documents/swift/conf/object.ring.gz',
    # 'account': '/Users/achang/Documents/swift/conf/account.ring.gz',
    # 'container': '/Users/achang/Documents/swift/conf/container.ring.gz'
    'object': '/etc/swift/object.ring.gz',
    'account': '/etc/swift/account.ring.gz',
    'container': '/etc/swift/container.ring.gz',
}
RINGS = {}
PREP_REQUEST_TIMEOUT = 90 * 60
REQUEST_TIMEOUT = 80 # seconds
RING_TYPES = RING_PATHS.keys()
RING_HEALTH = {}

POOL_MAP = defaultdict(tornado.locks.Lock)

STATSD_PREFIX = 'swift.object'
TOTAL_SPACE = '{0}.total_space'.format(STATSD_PREFIX)
USED_SPACE = '{0}.used_space'.format(STATSD_PREFIX)
PERCENT_USED = '{0}.percent_used'.format(STATSD_PREFIX)

class SwiftBuilderException(Exception):
    """
    Exception when we fail to find or open the local ring file.
    """
    pass

@tornado.gen.coroutine
def swift_hosts(services=ALL_SERVICES):
    """
    Returns a list of all the swift hosts (ip:port) in the system, as known about by consul.
    """
    hosts = yield swift_host_dicts(services)
    raise tornado.gen.Return(hosts.values())

@tornado.gen.coroutine
def swift_host_dicts(services=ALL_SERVICES):
    """
    Queries consul for each swift group and returns a dict of hostnames to ip:port of the
    local swift api running on them.
    """
    global consul_last_cached, consul_cache

    service_key = tuple(services)

    if time.time() - consul_last_cached[service_key] > CONSUL_CACHE_DURATION:
        result = {}
        for service in services:
            resp = yield tornado.gen.Task(http_client.fetch, CONSUL_URL.format(service))
            if resp.code != 200:
                raise tornado.gen.Return(consul_cache)

            for host in json.loads(resp.body):
                result[host['Node']] = host['Address'] + ':' + str(host['ServicePort'])
        consul_cache[service_key] = result
        consul_last_cached[service_key] = time.time()

    raise tornado.gen.Return(consul_cache[service_key])

def push_ring(host, ring, ring_file):
    """
    Push a new ring file a remote swift instance.
    """
    with open(ring_file, 'rb') as f:
        body = f.read()

        url = "http://{0}/swift/{1}".format(host, ring)
        print 'fetching url {0}'.format(url)
        return tornado.gen.Task(http_client.fetch, url, method='POST', body=body, request_timeout=REQUEST_TIMEOUT)

@tornado.gen.coroutine
def call_subprocess(cmd):
    """
    Wrapper around subprocess call using Tornado's Subprocess class.
    """
    sub_process = tornado.process.Subprocess(
        cmd, stdout=Subprocess.STREAM, stderr=Subprocess.STREAM
    )

    result, error = yield [
        tornado.gen.Task(sub_process.stdout.read_until_close),
        tornado.gen.Task(sub_process.stderr.read_until_close)
    ]

    raise tornado.gen.Return((result, error))


class SwiftCollection():
    """
    holds all drives that the central api is currently working on initializing
    """
    def __init__(self):
        self.map = {}

    def add(self, host, drive, drive_type, instant=True, file_system='xfs'):
        """
        start initializing a drive
        """
        drive = SwiftDrive(host, drive, drive_type, instant, file_system)
        self.map[drive.redis_hash] = drive

        return drive

    def remove(self, hash):
        del self.map[hash]

    def getItems(self):
        """
        return hashes for all drives that the central api is initializing -- can compare this to smembers in redis
        """
        return set(self.map.keys())

DEVICE_COLLECTION = SwiftCollection();

class SwiftDrive():
    """
    initialize one drive through the three steps -- prep, format, add to ring
    """
    def __init__(self, host, drive, drive_type, instant, file_system):
        self.host = host
        self.drive = drive.split('/')[-1]
        self.drive_type = drive_type
        self.instant = instant
        self.file_system = file_system

        if '/dev/' not in drive:
            self.full_drive = '/dev/' + drive
        else:
            self.full_drive = drive
        self.full_host = '{0}.sfo01.justin.tv'.format(self.host)

        #the redis hash contains all the information necessary to resume the operation should the central api go down
        self.redis_hash = '{0}:{1}:{2}:{3}:{4}'.format(self.host, self.drive, self.drive_type, self.instant, self.file_system)
        print 'initializing {0}'.format(self.redis_hash)

    def parse_size(self, disk):
        """
        parse the lshw dump for the device size
        """
        devices = disk.get('devices', {}).get('node', [])
        if type(devices) != list:
            devices = [devices]

        for device in devices:
            name = device.get('logicalname', device.get('@handle'))
            if type(name) == list:
                name = '/'.join(name)

            if 'node' in device:
                nodes = device['node']
                if type(nodes) is dict:
                    nodes = [nodes]
                for node in nodes:
                    subname = node.get('logicalname')

                    if subname == self.full_drive[:-1]:
                        size = node.get('size')
                        if size:
                            return int(size[:-2])

        return None

    def end(self):
        """
        remove device from central api queue -- message to redis that this device is complete
        """
        redis_client.hdel('add_status', self.redis_hash)
        redis_client.srem('add_device', self.redis_hash)

        print 'COMPLETED drive {0}'.format(self.redis_hash)

    @tornado.gen.coroutine
    def start(self, stage):
        """
        start device at specified stage -- only one device can be prepped at a time everything else
        can be done in parallel
        """
        result = False

        if stage == 'PREP':
            redis_client.sadd('add_device', self.redis_hash)
            print 'WAITing to prep {0}'.format(self.redis_hash)
            with (yield POOL_MAP[self.host].acquire()):
                print 'STARTing to prep {0}'.format(self.redis_hash)
                if (yield self.prep()):
                    result = yield self.start('FORMAT')
                else:
                    print 'ERROR drive {0} during PREP'.format(self.redis_hash)
                    DEVICE_COLLECTION.remove(self.redis_hash)
        elif stage == 'FORMAT':
            print 'STARTing to format {0}'.format(self.redis_hash)
            if (yield self.format()):
                result = yield self.start('ADD')
            else:
                print 'ERROR drive {0} during FORMAT'.format(self.redis_hash)
                DEVICE_COLLECTION.remove(self.redis_hash)
        elif stage == 'ADD':
            print 'STARTing to add {0}'.format(self.redis_hash)
            result = yield self.add()
            self.end()

        else:
            raise tornado.gen.Return(False)

        raise tornado.gen.Return(result)

    @tornado.gen.coroutine
    def prep(self):
        """
        update redis on curr stage then being prepping device
        """
        redis_client.hset('add_status', self.redis_hash, 'PREP')

        url = 'http://{0}:8082/prep_device/{1}'.format(self.full_host, self.drive[:-1])

        response = yield tornado.gen.Task(http_client.fetch, url, method='POST', body=json.dumps({}), connect_timeout=PREP_REQUEST_TIMEOUT, request_timeout=PREP_REQUEST_TIMEOUT)

        if response.error:
            print response
            raise tornado.gen.Return(False)
        else:
            raise tornado.gen.Return(True)

    @tornado.gen.coroutine
    def format(self):
        """
        update redis on curr stage then being formatting device
        """
        redis_client.hset('add_status', self.redis_hash, 'FORMAT')

        url = 'http://{0}:8082/format_device/{1}?fs={2}'.format(self.full_host, self.drive[:-1], self.file_system)

        response = yield tornado.gen.Task(http_client.fetch, url, method='POST', body=json.dumps({}), connect_timeout=PREP_REQUEST_TIMEOUT, request_timeout=PREP_REQUEST_TIMEOUT)

        if response.error:
            print response
            raise tornado.gen.Return(False)
        else:
            raise tornado.gen.Return(True)

    @tornado.gen.coroutine
    def add(self):
        """
        update redis get disk information, then add disk into swift ring
        """
        redis_client.hset('add_status', self.redis_hash, 'ADD')

        disk_url = 'http://{0}:8082/disks'.format(self.full_host)
        uuids_url = 'http://{0}:8082/uuids'.format(self.full_host)
        zone_url = 'http://{0}:8082/zone'.format(self.full_host)

        disk_resp, uuids_resp, zone_resp = yield [
            tornado.gen.Task(http_client.fetch, disk_url, method='GET', request_timeout=REQUEST_TIMEOUT),
            tornado.gen.Task(http_client.fetch, uuids_url, method='GET'),
            tornado.gen.Task(http_client.fetch, zone_url, method='GET')
        ]

        port_map = {
            'container': 6001,
            'account': 6002,
            'object': 6000
        }

        port = port_map.get(self.drive_type, None)
        ip = socket.gethostbyname(self.full_host)

        if disk_resp.error or uuids_resp.error or zone_resp.error or not port or ip == '127.0.0.1':
            raise tornado.gen.Return(False)

        max_weight = self.parse_size(tornado.escape.json_decode(disk_resp.body))

        device = {
            'weight': max_weight if self.instant else 0.0,
            'ip': ip,
            'region': 1,
            'zone': tornado.escape.json_decode(zone_resp.body).get('zone'),
            'port': port,
            'device': tornado.escape.json_decode(uuids_resp.body).get(self.full_drive).get('uuid'),
            'meta': self.full_host,
        }

        ring = ring_for_type(self.drive_type)
        device_id = yield ring.add(device, rebalance=self.instant)

        if not self.instant:
            key = "{0}:{1}".format(self.drive_type, device_id)
            val = "{0}:{1}".format(max_weight, int(time.time()))
            redis_client.hset('weight_targets', key, val)

        raise tornado.gen.Return(True)

class SwiftRingBuilder(Ring):
    """
    The heart of what swift central api does.  A class that manages adding, removing, and updating
    devices in a swift ring.

    Actions are new, add, remove, and set_weight.  These actions are finalized by calling rebalance.
    """
    def __init__(self, file_path, ring_type):
        """
        file_path: path to local gzipped ring file.  Must end in 'ring.gz'
                   Builder file name is the same with ring.gz replaced with builder
        ring_type: object, account, or container
        """
        if 'ring.gz' not in file_path:
            raise SwiftBuilderException('not a valid gzipped ring file')
        self.builder_file = file_path.replace('ring.gz', 'builder')

        if not os.path.exists(self.builder_file):
            self.new()
        if not os.path.exists(file_path):
            subprocess.call(['swift-ring-builder', self.builder_file, 'rebalance'])

        super(SwiftRingBuilder, self).__init__(file_path)

        self.type = ring_type
        self.ring_file = file_path
        self.read_ring()

    @tornado.gen.coroutine
    def rebalance(self):
        """
        Rebalance local ring file based on builder file.  Will apply all devices added,
        removed, or updated and push them to each known swift host.
        """

        hosts = yield swift_hosts(BACKUP_SERVICES)
        update_futures = {}

        for host in hosts:
            update_futures[host] = push_ring(host, self.type, self.ring_file)

        yield update_futures

        print 'rebalancing {0}'.format(self.type)
        yield call_subprocess(['swift-ring-builder', self.builder_file, 'rebalance'])
        print 'finished rebalance {0}'.format(self.type)

        hosts = yield swift_hosts(CONSUL_SERVICES)
        self._reload(force=True)
        self.read_ring()
        update_futures = {}

        for host in hosts:
            update_futures[host] = push_ring(host, self.type, self.ring_file)

        yield update_futures

    def read_ring(self):
        self.builder = swift.common.ring.RingBuilder.load(self.builder_file)
        self.devices = {}

        for dev in self.builder.devs:
            if dev:
                self.devices[dev['id']] = dev

    def new(self, part_power=20, min_part_hours=0.5, replicas=3):
        """
        Create a new swift ring from scratch.  By default the parameters of the ring are
        partition power 20 (a million partitions), min part hours 0.5 (30 minutes between
        rebalances), and 3 replicas.
        """
        self.builder = swift.common.ring.RingBuilder(part_power, replicas, min_part_hours)
        self.builder.save(self.builder_file)
        self.rebalance()

    @tornado.gen.coroutine
    def remove(self, device_id, rebalance=False):
        """
        Remove a device completely from the ring, by device_id.  If rebalance is True,
        rebalance the ring afterward.
        """
        if device_id in [dev['id'] for dev in self.builder.to_dict()['_remove_devs']]:
            raise tornado.gen.Return(False)

        try:
            self.builder.remove_dev(int(device_id))
            self.builder.save(self.builder_file)
        except ValueError:
            raise tornado.gen.Return(False)

        if rebalance:
            yield self.rebalance()

        raise tornado.gen.Return(True)

    @tornado.gen.coroutine
    def add(self, device, rebalance=False):
        """
        Add a device to the ring.  Takes a dictionary with the following keys:
           weight, region, zone, ip, port, device

        If rebalance is True, rebalance the ring afterward.
        """
        device_id = self.builder.add_dev(device)
        self.builder.save(self.builder_file)

        if rebalance:
            yield self.rebalance()

        raise tornado.gen.Return(device_id)

    @tornado.gen.coroutine
    def set_weight(self, device_id, weight, rebalance=False, save=True):
        """
        Change the weight of the device identified by device_id to the supplied
        weight.  If rebalance is True, rebalance the ring afterward.
        """
        try:
            self.builder.set_dev_weight(int(device_id), float(weight))
            if save:
                self.builder.save(self.builder_file)
        except ValueError:
            raise tornado.gen.Return(False)
        except Exception:
            print traceback.format_exc()

        if rebalance:
            yield self.rebalance()

        raise tornado.gen.Return(True)

    @tornado.gen.coroutine
    def to_dict(self):
        """
        Convert ring object into serializable dict.  Augment controls
        whether or not UUIDs are attached to devices.
        """
        self.read_ring()

        raise tornado.gen.Return({
            'partitions': self.partition_count,
            'replicas': self.replica_count,
            'balance': self.builder.get_balance(),
            'devices': self.devices,
        })

    def is_in_ring(self, uuid):
        """
        For a given UUID, return whether or not it exists in the ring.
        """
        return uuid in [device['device'] for device in self.devices.values()]

def ring_for_type(ring_type):
    """
    Given a ring name, returns the corresponding ring object
    """
    ring = RINGS.get(ring_type)
    ring.read_ring()
    return ring

def check_arguments(body, args):
    """
    Helper function to check that all required arguments are present.

    body: dictionary to check
    args: required arguments
    """
    for key in args:
        if key not in body:
            return {'error': 'missing argument - {0}'.format(key)}

class PushRingHandler(CallbackHandler):
    """
    Request handler for /push_ring, which triggers a ring file push
    to all swift nodes.
    """
    @tornado.gen.coroutine
    def post(self):
        """
        POST /push_ring.  Causes the local ring file to be blasted everywhere.
        """
        hosts = yield swift_hosts()
        for host in hosts:
            for ring in RING_TYPES:
                yield push_ring(host, ring, RING_PATHS[ring])

        self.write_with_callback({'updated': hosts})

class SwiftInitHandler(CallbackHandler):
    """
    Request handler that interacts with swift-init, on /swift_init.
    Can get the status of any service or start / stop /restart a service.
    """
    @tornado.gen.coroutine
    def get(self, host='all'):
        """
        GET /swift_init/{host}

        Show all services running on the specified host
        """
        hosts = yield swift_host_dicts()
        if host != 'all':
            if host not in hosts:
                self.write_with_callback({'error': 'no such host {0}'.format(host)})
                return
            hosts = {host: hosts[host]}

        futures = {}
        for host, addr in hosts.iteritems():
            futures[host] = tornado.gen.Task(
                http_client.fetch,
                "http://{0}/swift_init/all".format(addr),
                request_timeout=REQUEST_TIMEOUT,
            )

        results = yield futures
        retval = {}
        for host in results:
            if results[host].code == 200:
                retval[host] = json.loads(results[host].body)
        self.write_with_callback(retval)

    @tornado.gen.coroutine
    def post(self, host, action):
        """
        POST /swift_init/{host}/(start|stop|restart)

        Stop / start / restart all services on the specified host.
        """
        if action not in ('start', 'stop', 'restart'):
            self.write_with_callback({'error': 'action must be start/stop'})
            return

        hosts = yield swift_host_dicts()
        if host != 'all':
            if host not in hosts:
                self.write_with_callback({'error': 'no such host {0}'.format(host)})
                return
            hosts = [hosts[host]]
        else:
            hosts = hosts.values()

        for host in hosts:
            tornado.gen.Task(
                http_client.fetch,
                "http://{0}/swift_init/all/{1}".format(host, action),
                method='POST',
                body='{}',
                request_timeout=REQUEST_TIMEOUT,
            )

        self.write_with_callback({'success': True})

@tornado.gen.coroutine
def check_hosts():
    """
    Loop that checks to see if all swift hosts have ring files.  If a ring file is absent,
    pushes it to the host.
    """
    try:
        hosts = yield swift_hosts()
        for host in hosts:
            for ring in RING_TYPES:
                resp = yield tornado.gen.Task(http_client.fetch,
                                             'http://{0}/swift/{1}'.format(host, ring),
                                             request_timeout=REQUEST_TIMEOUT)
                if resp.code == 404:
                    yield push_ring(host, ring, RING_PATHS[ring])
    finally:
        tornado.ioloop.IOLoop.instance().add_timeout(time.time() + 60, check_hosts)

class HealthHandler(CallbackHandler):
    """
    Request handler for /health that summarizes health information over the entire swift cluster.
    """
    @tornado.gen.coroutine
    def get(self):
        """
        GET /health

        Returns json with the following structure:
        {
            "[ring_type]": {
                "summary": {
                    "total_space_TB": [total space],
                    "used_space_TB": [used space],
                    "percent_used": [percent used]
                },
                "servers": {
                    "[host ip:port]": {
                        "used_space_TB": [used space],
                        "total_space_TB": [total space],
                        "percent_used": [percent used],
                        "unhealthy_devices": [
                            {
                                "uuid": [uuid],
                                "drive": [device path]
                            },
                            ...
                        ]
                    },
                    ...
                }
            },
            ...
        }

        Any unmounted devices will show up in the unhealthy devices list.
        These devices require attention and/or replacement.
        """

        self.write_with_callback(RING_HEALTH)

class SwiftHostHandler(CallbackHandler):
    """
    Request handler for /hosts, an endpoint that lists the swift hosts
    """
    @tornado.gen.coroutine
    def get(self):
        """
        GET /hosts

        Returns the result of swift_host_dicts in json form.
        """
        hosts = yield swift_host_dicts()
        self.write_with_callback(hosts)

class RedisHandler(CallbackHandler):
    """
    Request handler for /redis, which gives status of objects existing in redis,
    such as target weights devices are moving toward and devices slated for removal.
    """
    @tornado.gen.coroutine
    def get(self):
        """
        GET /redis

        Returns a json object with two components:
        {
            "weight_targets": {
                "[ring name]": {
                    "[device uuid]": [target weight],
                    ...
                },
                ...
            },
            "remove_devices": {
                "[ring name]": {
                    "[device uuid]": true,
                    ...
                },
                ...
            },
            "add_devices": {
                "[ring name]": [{
                    ...
                }],
                ...
            }
        }
        """
        result = {
            'weight_targets': {'account': {}, 'container': {}, 'object': {}},
            'remove_devices': {'account': {}, 'container': {}, 'object': {}},
            'add_devices': {'account': [], 'container': [], 'object': []},
        }

        weight_targets = redis_client.hgetall('weight_targets')
        devices = redis_client.smembers('remove_devices')
        additions = redis_client.smembers('add_device')
        additions_stage = redis_client.hgetall('add_status')

        for key in devices:
            ring, id = key.split(':')
            result['remove_devices'][ring][id] = True
        for key, val in weight_targets.iteritems():
            ring, id = key.split(':')
            weight, ts = val.split(':')
            result['weight_targets'][ring][id] = weight
        for key in additions:
            host, drive, type, instant, fs = key.split(':')
            result['add_devices'][type].append({
                'host': host,
                'drive': drive,
                'type': type,
                'stage': additions_stage.get(key, 'PENDING'),
            })

        self.write_with_callback(result)

class IndexHandler(tornado.web.RequestHandler):
    @tornado.web.asynchronous
    @tornado.gen.engine
    def get(self):
        self.render('index.html')

class SwiftRingHandler(CallbackHandler):
    """
    Request handler to display swift rings, under /swift
    """
    def ring(self, ring_type):
        """
        Looks up ring based on ring_type and writes an error out if the ring does not exist.
        """

        ring = ring_for_type(ring_type)

        if not ring:
            self.write_with_callback({'error': 'Invalid ring type: {0}'.format(ring_type)})
            return

        return ring

    @tornado.gen.coroutine
    def get(self, ring_type):
        """
        GET /ring/{ring_type}

        Returns ring builder info for the specified ring
        """
        ring = self.ring(ring_type)
        ret = yield ring.to_dict()

        if ring:
            self.write_with_callback(ret)

    @tornado.gen.coroutine
    def target_weight(self, ring, device_id, weight):
        """
        Set a device to target a weight.

        ring: ring in which device resides.
        device_id: id of device in that ring.
        weight: weight that device should ultimately have.
        """
        key = "{0}:{1}".format(ring.type, device_id)
        val = "{0}:{1}".format(weight, int(time.time()))
        redis_client.hset('weight_targets', key, val)

    @tornado.gen.coroutine
    def delete(self, ring_type, device_id):
        """
        DELETE /ring/{ring_type}/{device_id}

        ring_type: object, account, or container
        device_id: id of device to delete
        """
        ring = self.ring(ring_type)

        if not ring:
            return

        if int(device_id) not in ring.devices:
            self.write_with_callback({'error': '{0} not found in {1} ring.'.format(device_id, ring_type)})
            return

        key = "{0}:{1}".format(ring.type, device_id)

        if self.get_argument('instant', False):
            yield ring.set_weight(device_id, 0.0)
        yield self.target_weight(ring, device_id, 0.0)
        redis_client.sadd('remove_devices', key)

        self.write_with_callback({'success': True})

    @tornado.gen.coroutine
    def put(self, ring_type, device_id=None):
        """
        PUT /ring/{ring_type}/[{device_id}]

        If device_id is present, update device_id with weight url parameter.
        Otherwise, create a new ring of ring_type.
        """
        ring = self.ring(ring_type)

        if not ring:
            return

        if device_id:
            if int(device_id) not in ring.devices:
                self.write_with_callback({'error': '{0} not found in {1} ring.'.format(device_id, ring_type)})
                return

            target_weight = self.get_argument('weight')
            yield self.target_weight(ring, device_id, target_weight)
            if self.get_argument('instant', False):
                yield ring.set_weight(device_id, target_weight)

            key = "{0}:{1}".format(ring.type, device_id)
            redis_client.srem('remove_devices', key)
            self.write_with_callback({'success': True})
            return

        part_power = int(self.get_argument('part_power', 20))
        min_part_hours = float(self.get_argument('min_part_hours', 0.5))
        replicas = int(self.get_argument('replicas', 3))

        ring.new(part_power, min_part_hours, replicas)
        self.write_with_callback({'success': True})

    @tornado.gen.coroutine
    def post(self, ring_type, action=None):
        """
        POST /ring/{ring_type}/[rebalance]

        if rebalance is specified, rebalance the ring denoted by ring_type.
        Otherwise, insert a new device with the json body.

        Device parameters are weight, region, zone, ip, port, and device.
        """
        ring = self.ring(ring_type)

        if not ring:
            return

        if action == 'rebalance':
            yield ring.rebalance()
            self.write_with_callback({'success': True})
            return

        device = json.loads(self.request.body)
        err = check_arguments(device, ('weight', 'region', 'zone', 'ip', 'port', 'device'))
        if err:
            self.write_with_callback(err)
            return

        target_weight = device['weight']
        instant = self.get_argument('instant', False)

        if not instant:
            device['weight'] = 0.0

        rebalance = not self.get_argument('wait', False) and instant
        device_id = yield ring.add(device, rebalance=rebalance)

        if not instant:
            self.target_weight(ring, device_id, target_weight)

        self.write_with_callback({'success': True})

class SwiftDriveHandler(CallbackHandler):
    """
    POST /drive

    the body should contain a json summary of the new host:

    {
        host:                                   //hostname
        drives: [...]                           //list of drives
        type: 'object'|'account'|'container'    //ring type
        instant: True|False                     //what to set initial weight at
        fileSystem: 'xfs'|'ext4'                //file system type
        stage: 'ADD'|'PREP'|'FORMAT'            //stage to start
    }

    add a new add operation to the queue
    """
    @tornado.gen.coroutine
    def post(self):
        host = tornado.escape.json_decode(self.request.body)

        drives = host.get('drives')
        for drive in drives:
            instance = DEVICE_COLLECTION.add(host.get('host'), drive, host.get('type').lower(), instant=host.get('instant'), file_system=host.get('fileSystem'))

            instance.start(host.get('stage'))

        self.write_with_callback({'success': True})

    """
    PUT /drive

    the body should contain a json containing the host:

    {
        host:                                   //hostname
    }

    allow the current add operation run to completion, then remove
        from queue (aka prevent rerun on failure for all add operations on this host)
    """
    @tornado.gen.coroutine
    def put(self):
        body = tornado.escape.json_decode(self.request.body)

        additions = redis_client.smembers('add_device')

        for addition in additions:
            if body.get('host') in addition:
                redis_client.srem('add_device', addition)

        self.write_with_callback({'success': True})

@tornado.gen.coroutine
def update_health():
    global RING_HEALTH

    url = "http://{host}/health/{ring}"
    try:
        futures = {}
        results = {}
        bad_hosts = set()

        SWIFT_HOSTS = yield swift_hosts()
        for ring_type in RING_TYPES:
            results[ring_type] = {}
            results[ring_type]['servers'] = {}
            used = 0
            total = 0

            for host in SWIFT_HOSTS:
                print 'reading health from {0}'.format(host)

                health_url = url.format(host=host, ring=ring_type)
                results[ring_type]['servers'][host] = yield tornado.gen.Task(http_client.fetch,
                                                                             health_url,
                                                                             request_timeout=REQUEST_TIMEOUT)
                resp = results[ring_type]['servers'][host]

                try:
                    if resp.code == 200:
                        result = json.loads(resp.body)
                        if not result['total_space_TB'] and not result['unhealthy_devices']:
                            del results[ring_type]['servers'][host]
                            continue

                        used += result['used_space_TB']
                        total += result['total_space_TB']
                        results[ring_type]['servers'][host] = result
                    else:
                        results[ring_type]['servers'][host] = 'unknown'
                        bad_hosts.add(host)
                except Exception:
                    results[ring_type]['servers'][host] = 'unknown'
                    bad_hosts.add(host)

            results[ring_type]['summary'] = {
                'used_space_TB': used,
                'total_space_TB': total,
                'percent_used': used / total if total else 0,
            }

            if ring_type == 'object':
                stats_client.gauge(TOTAL_SPACE, total)
                stats_client.gauge(USED_SPACE, used)
                stats_client.gauge(PERCENT_USED, used / total if total else 0)

            results['bad_hosts'] = list(bad_hosts)

        print 'updated health!'
        RING_HEALTH = results
    except Exception:
        print traceback.format_exc()
    finally:
        tornado.ioloop.IOLoop.instance().add_timeout(time.time() + 60, update_health)

@tornado.gen.coroutine
def update_drive_queue():
    """
    sync central api with redis wrt adding new devices. Restart any add operations that
    appear in redis but arent in local queue
    """
    print 'updating drive queue...'
    try:
        in_progress = redis_client.smembers('add_device')

        difference =  set(in_progress) - DEVICE_COLLECTION.getItems()

        for redis_hash in difference:
            host, drive, type, instant, fs = redis_hash.split(':')
            instance = DEVICE_COLLECTION.add(host, drive, type, instant=='True', fs)

            stage = redis_client.hget('add_status', redis_hash)
            if stage is None:
                instance.start('PREP')
            else:
                instance.start(stage)

    except Exception:
        print traceback.format_exc()
    finally:
        tornado.ioloop.IOLoop.instance().add_timeout(time.time() + 120, update_drive_queue)


@tornado.gen.coroutine
def update_weights():
    """
    Main loop that runs and updates weights on devices as necessary.
    When a weight is updated, a key value pair is inserted into redis specifying the target weight.
    At an interval, we increment or decrement the weight by a set interval toward the target.
    """

    print 'updating weights...'
    try:
        weights = redis_client.hgetall('weight_targets')
        curr_time = int(time.time())
        changed_rings = set()

        for ring in RING_TYPES:
            if RINGS[ring].builder.get_balance() > 1.0:
                changed_rings.add(RINGS[ring])

        for device, weight_time in weights.iteritems():
            ring_type, device_id = device.split(":")
            weight, next_time = weight_time.split(":")
            weight = float(weight)

            next_time = int(next_time)

            if next_time > curr_time:
                continue

            next_weight_time = "{0}:{1}".format(weight, str(curr_time + WEIGHT_UPDATE_INTERVAL))

            redis_client.hset('weight_targets', device, next_weight_time)

            if ring_type not in ('object', 'account', 'container'):
                redis_client.hdel('weight_targets', device)

            ring = RINGS.get(ring_type)
            if not ring:
                continue

            device_id = int(device_id)

            if device_id not in ring.devices:
                redis_client.hdel('weight_targets', device)
                continue

            curr_weight = ring.devices[device_id]['weight']
            print 'device: {0}, weight: {1}, target: {2}'.format(device, curr_weight, weight)

            if curr_weight < weight:
                curr_weight = min(curr_weight + WEIGHT_INCREMENT, weight)
            elif curr_weight > weight:
                curr_weight = max(curr_weight - WEIGHT_INCREMENT, weight)

            if curr_weight == 0.0:
                result = redis_client.sismember('remove_devices', device)
                if result:
                    removed = yield ring.remove(device_id, rebalance=False)
                    changed_rings.add(ring)
                    if removed:
                        redis_client.hdel('weight_targets', device)
                        redis_client.srem('remove_devices', device)
                    continue

            yield ring.set_weight(device_id, curr_weight, rebalance=False, save=False)
            changed_rings.add(ring)

            if curr_weight == weight:
                redis_client.hdel('weight_targets', device)

        for ring in changed_rings:
            ring.builder.save(ring.builder_file)
            yield ring.rebalance()
    except Exception:
        print traceback.format_exc()
    finally:
        tornado.ioloop.IOLoop.instance().add_timeout(time.time() + REBALANCE_INTERVAL, update_weights)

if __name__ == "__main__":
    stats_client = statsd.StatsClient(host=STATSD_HOST)
    redis_client = redis.StrictRedis()
    http_client = tornado.httpclient.AsyncHTTPClient()

    for ring in RING_TYPES:
        RINGS[ring] = SwiftRingBuilder(RING_PATHS[ring], ring)

    settings = {
        "template_path": os.path.join(os.path.dirname(__file__), "templates"),
        "static_path": os.path.join(os.path.dirname(__file__), "static"),
        "debug" : True
    }

    application = tornado.web.Application([
    	(r'/', IndexHandler),
        (r'/index', IndexHandler),
        (r"/swift/(.*)/(.*)", SwiftRingHandler),
        (r"/swift/(.*)", SwiftRingHandler),
        (r"/hosts", SwiftHostHandler),
        (r"/hosts/(.*)", SwiftHostHandler),
        (r"/health", HealthHandler),
        (r"/push_ring", PushRingHandler),
        (r"/swift_init/(.*)/(start|stop)", SwiftInitHandler),
        (r"/swift_init/(.*)", SwiftInitHandler),
        (r"/push_ring", PushRingHandler),
        (r"/redis", RedisHandler),
        (r"/drive", SwiftDriveHandler),
    ], **settings)

    application.listen(8077)

    main_loop = tornado.ioloop.IOLoop.instance()
    print 'updating weights'
    main_loop.add_timeout(time.time() + REBALANCE_INTERVAL, update_weights)
    print 'updating health'
    main_loop.add_timeout(time.time(), update_health)
    print 'checking hosts'
    main_loop.add_timeout(time.time(), check_hosts)
    print 'checking drive additions'
    main_loop.add_timeout(time.time(), update_drive_queue)
    main_loop.start()
