#!/usr/bin/env python
from pyPgSQL import PgSQL
import time, sys, urllib, traceback, os, random

def finish(exit_status, errtxt):
    print errtxt
    sys.exit(exit_status)
    
def format(t):
    return time.strftime("%Y-%m-%d %H:%M:%S", t)
tenago = format(time.gmtime(time.time() - 600))
fifteenago = format(time.gmtime(time.time() - 900))



try:
    db_conn = PgSQL.connect(database="londiste_prod", host="127.0.0.1:12006", user="rails", password="ratspourmarblehomes")
except:
    traceback.print_exc()
    finish(3, 'Unable to connect to database')


meida_hosts = {}

db_cur = db_conn.cursor()
query = "select id, host from media_hosts"
db_cur.execute(query)
rows = db_cur.fetchall()
assert rows, "unable to read media hosts"
for row in rows:
    meida_hosts[int(row[0])] = row[1]


def get_archive_url(archive):
    "return a download url for a given archive table row"
    global meida_hosts
    
    file_name, servers, kind =  archive
    
    if file_name.startswith('http://'):
        return file_name
    assert servers, "no servers for archive %s"%archive['id']
    host_id = int(random.choice(servers.split(',')))
    assert host_id in meida_hosts, "unable to find media host with id %s"%host_id
    host =  meida_hosts[host_id]   

    if kind == 'upload':
        return 'http://%s.justin.tv/%s'%(host, file_name)
    else:
        return 'http://%s.justin.tv/archives/%s'%(host, file_name)


def exists(file_name):
    try:
        url = get_archive_url(file_name) 
        lines = os.popen('curl -s -L -I %s' % url).readlines()
        #print "checking %s" % file_name
        #print data
        resp_stat = False
        for line in lines:
            if line.startswith('HTTP'):
                resp_stat = int(line.split()[1])
        #print "resp: %s" % resp_stat
    except:
        return False #'err'
    if 200 == resp_stat:
        return True
    else:
        return False


try:
    db_cur = db_conn.cursor()
    query = "select file_name, servers, kind from archives where length > 5 and start_time BETWEEN '%s' AND '%s'" % (fifteenago, tenago)
    db_cur.execute(query)
    file_names = db_cur.fetchall()
    random.shuffle(file_names)
    
    count = 0
    dead_names = []
    for file_name in file_names[:50]:
        r = exists(file_name)
        if not r:
            count += 1
            dead_names.append(file_name[0])
        elif r == 'err':
            pass
    
    if count > 0:
        finish(1, 'Some archives not uploaded (%s/50) - Samples: %s' % (count, repr(dead_names[:5])))
    else:
        finish(0, 'Archive Uploads OK - 50 archives checked of %s' % len(file_names))
except SystemExit:
    raise
except:
    finish(3, 'Unable to select from the database')

