import csv
import gzip
import datetime


from webmaster3.cass import Cass, Keyspaces
from webmaster3.task import Task
from cassandra import ConsistencyLevel

WRITER = None
SESSION = None

def run(cluster):
    global WRITER, SESSION

    SESSION = Cass.cluster(cluster).connect()
    SESSION.set_keyspace(Keyspaces.WEBMASTER3)
    SESSION.default_timeout = 1200
    SESSION.default_consistency_level = ConsistencyLevel.QUORUM

    from os.path import expanduser
    home_path = expanduser("~")

    with gzip.open(home_path + '/spam_urlcheck_requests.csv.gz', "wb") as f:
        WRITER = csv.writer(f, delimiter=';', quoting=csv.QUOTE_ALL)
        collect_requests()

    print "Done."


def collect_requests():
    all_reqs = SESSION.execute("SELECT * FROM host_url_checker2_requests")
    count = 0
    for req in all_reqs:
        if count % 1000 == 0:
            print 'Processed: %s' % count

        # print req
        res = SESSION.execute("SELECT * FROM host_url_checker2_results WHERE host_id='%s' and request_id=%s" % (
            req.host_id, req.request_id))
        if not res:
            handle_req_without_res(req)

        count += 1


def handle_req_without_res(req):
    is_in_hosts = True #is_in_hosts_table(req.host_id)
    created_time = req.created_time
    now = datetime.datetime.now()
    delta = now - created_time
    # if not is_in_hosts:
    WRITER.writerow((req.host_id, req.request_id, delta.days))


def is_in_hosts_table(host_id):
    res = SESSION.execute("SELECT * FROM hosts WHERE host_id='%s'" % host_id)
    if not res:
        in_table = False
    else:
        in_table = True

    return in_table


Task.run(run, "prod")
