import boto3
import json
import urllib2
import threading
import time
import os

print "Loading function"
MAX_TIME = 60

sqs = boto3.client('sqs')
s3 = boto3.client('s3')


def async(func):
    from threading import Thread
    from functools import wraps
    
    @wraps(func)
    def async_func(*args, **kwargs):
        func_hl = Thread(target = func, args = args, kwargs = kwargs)
        func_hl.start()
        return func_hl
    
    return async_func

def gen_update(lineno, bucket, *key_parts):
    global global_counter
    
    if len(key_parts) != 5:
        raise ValueError("Too many or too few (should be len 5) key parts in "+str(key_parts))
    
    # This will cause an error on date boundaries because the bucket name may
    # not match the timestamp in cases where a timestamp is provided in JSON
    # ...
    # probably.
    # I guess.
    return json.dumps(
        { "index" :
            {
                "_id" : "%s-%s-%010d"%(key_parts[3], key_parts[4], lineno),
                "_type" : "log",
                "_index": "%s-%s.%s.%s"%(bucket, key_parts[0], key_parts[1], key_parts[2]),
            },
        }
    )

FLUSH_LOCK = threading.Lock()

def flush(post_list):
    if len(post_list) == 0:
        return True
    
    post = '\n'.join(post_list) + '\n'
    url = os.environ["ES_BULK_URL"]

    clen = len(post)
    
    while True:
        req = urllib2.Request(url, post, {'Content-Type': 'application/json', 'Content-Length': clen})
        with FLUSH_LOCK:
            f = urllib2.urlopen(req)
        response = json.load(f)
        f.close()
        success = True
        for item in response['items']:
            if item["index"]["status"] == 429:
                raise EnvironmentError("Elasticsearch returned a 429.  Backing off.")

            if item["index"]["status"] > 299:
                print "failed to write block"
                print json.dumps(response)
                print json.dumps(post)
                success = False
        return success

found_a_message = threading.Event()
found_a_message.clear()
needs_timeout_reset = threading.Event()
needs_timeout_reset.clear()

def pull_s3_event_from_sqs():
    queue_url = os.environ['QUEUE_URL']
    msgs = sqs.receive_message(QueueUrl=queue_url)
    print "Messages:", json.dumps(msgs)
    if 'Messages' not in msgs:
        print "No messages -- returning False"
        return False

    print "msgs.keys():", str(msgs.keys())
    for msg in msgs['Messages']:
        print "msg.keys():", str(msg.keys())
        body = json.loads(msg['Body'])
        print body
        if 'Records' in body:
            for record in body['Records']:
                if 's3' in record:
                    if not handle_s3_object(record['s3']['bucket']['name'], record['s3']['object']['key']):
                        raise ValueError("Error processing record: s3://%s/%s"%(record['s3']['bucket'], record['s3']['object']['key']))
        else:
            print "Dropping message with no Records key: " + str(body)
        sqs.delete_message(QueueUrl=queue_url, ReceiptHandle=msg["ReceiptHandle"])
        found_a_message.set()

    return True

@async
def run():
    starttime = time.time()
    while pull_s3_event_from_sqs():
        if time.time() - starttime > MAX_TIME:
            needs_timeout_reset.set()
            return True
        print "Pulling another immediately"
    print "No more messages.  Ending loop."
    return False

def run_another():
    print "Scheduling a new lambda job and pulling more"
    l = boto3.client('lambda')
    l.invoke(FunctionName=os.environ['FUNCTION_NAME'], InvocationType='Event')

def lambda_handler(event, context):
    if "parallelize" in event:
        for i in range(int(event['parallelize'])):
            run_another()
        return
    if "source" in event and event["source"] == "aws.events":
        for i in range(2):
            run_another()
        return
    threads = [run() for _ in range(1)]
    results = [thread.join() for thread in threads]
    if needs_timeout_reset.is_set():
        print "needs_timeout_reset: True"
    else:
        print "needs_timeout_reset: False"
    if found_a_message.is_set():
        print "found_a_message: True"
    else:
        print "found_a_message: False"
    if needs_timeout_reset.is_set() and found_a_message.is_set():
        print "Found a message and timeout needs reset"
        run_another()
    print "Exiting"
    return results

def handle_s3_object(bucket, key):
    post = []

    print "s3://%s/%s"%(str(bucket),str(key))

    success = True

    obj = s3.get_object(Bucket=bucket, Key=key)
    lineno = 0
    for l in obj['Body'].read().split('\n'):
        lineno += 1
        if not l:
            continue
        if l[0] != '{':
            continue
        post.append(gen_update(lineno, bucket, *key.split('/')))
        post.append(l)
        if len(post) >= 5000:
            if not flush(post):
                success = False
            post = []
    print lineno

    if not flush(post):
        success = False
    return success

