#!/usr/bin/python

import subprocess, re, time, datetime, optparse, sys, traceback

log = '/var/log/beancounter/reducer.log'
regex = '.*: (\d+)'

opt_args = [
  (['--critical', '-c'], {'help':'how many seconds to go critical at', 'type':'int', 'default':900}),
  (['--warn', '-w'], {'help':'how many seconds to warn at', 'type':'int', 'default':600}),
]
parser = optparse.OptionParser()
for option in opt_args: parser.add_option(*option[0], **option[1])
options, args = parser.parse_args()

commands = [
  ['tail', '-n', '200', log],
  ['grep', 'insert thread finished minute'],
  ['tail', '-n', '1'],
]
tail = None
for command in commands:
  tail = subprocess.Popen(command, stdin=(tail.stdout if tail else None), stdout=subprocess.PIPE, stderr=subprocess.PIPE)

out, err = tail.communicate()

def misc_error():
  output = ["error getting reducer lag"]
  output += out.split('\n')
  output += err.split('\n')
  print ' : '.join(output)

  sys.exit(2)

if tail.returncode != 0:
  misc_error()

td = datetime.datetime.utcnow() - datetime.datetime.now()
td = td.days * 24 * 60 * 60 + td.seconds

if out:
  match = re.match(regex, out.strip())
  if match:
    try:
      last_minute = int(match.group(1)) - td
    except ValueError:
      misc_error()

    lag = int(time.time() - last_minute)
    if lag >= options.critical:
      print "reducer lag at %s minutes" % (lag / 60)
      sys.exit(2)
    elif lag >= options.warn:
      print "reducer lag at %s minutes" % (lag / 60)
      sys.exit(1)
    elif lag < 0:
      print "negative reducer lag? %s seconds" % (lag)
      sys.exit(2)

    print 'lag within normal parameters: %s minutes' % (lag / 60)
    sys.exit(0)
  else: 
    print 'no match for %s in line %s' % (regex, out.strip())
    sys.exit(2)
else:
  print 'no output from ssh call'
  sys.exit(2)
