import os
import sys
import time
import argparse
import logging
import tarfile
import shutil
import subprocess
import urllib2
import json

from io import IOBase
from os import path
from glob import glob
from tempfile import NamedTemporaryFile

from urlparse import urlparse
from boto.s3.connection import S3Connection

def bail(fmt, *args):
    logging.error(fmt, *args)
    sys.exit("aborting")

def parse_args():
    parser = argparse.ArgumentParser()

    parser.add_argument('--input-s3', '-s', required=True,
                       help='Path on S3 that contains the path on S3 of the actual db')
    parser.add_argument('--input-s3-override-dbopt', default=None,
                       help='Usher DB option containing the path on S3 of the actual db, overriding the path from --input-s3 if non-empty')
    parser.add_argument('--dbopt-url', default='https://sjc02.us-west-2.prod.diablo.s.twitch.a2z.com',
                       help='Base URL for Diablo, used for fetching dbopts.')
    parser.add_argument('--destination-dir', '-d', default='.',
                       help='Directory in which to place the db')

    return parser.parse_args()

def write_through_cmd(cmdline, writer, f):
    p = subprocess.Popen(cmdline, stdin=subprocess.PIPE, stdout=f)
    try:
        writer(p.stdin)
        p.stdin.close()
    except:
        p.terminate();
    finally:
        p.wait()
    if p.returncode != 0:
        bail("{} exited unhappily".format(cmdline[0]))


def extract_db(writer, ext, dest):
    delete_on_error = False
    try:
        with open(dest, mode="wb") as f:
            delete_on_error = True
            if ext.startswith(".tar"):
                ext = ext[4:]
                cmdline = ["tar", "--extract", "--to-stdout", "vbi_score.db"]
                if ext in (".lz", ".xz"):
                    cmdline.insert(1, "--xz")
                write_through_cmd(cmdline, writer, f)
            else:
                writer(f)
    except:
        if delete_on_error:
            os.remove(dest)
        raise

def get_scores_url(conn, latest_url):
    # we pass validate=False a bunch of times to avoid checking whether the
    # objects exist, which requires more S3 permissions than strictly necessary
    # for us here to actually fetch them.
    bucket = conn.get_bucket(latest_url.hostname, validate=False)
    # check the `latest` file for the path to the actual db
    db_tarball_url_str = bucket.get_key(latest_url.path, validate=False).get_contents_as_string().strip()
    if db_tarball_url_str[0:5] != "s3://":
        db_tarball_url_str = db_tarball_url_str[:20]
        bail("latest doesn't look like an s3 key: {}".format(repr(db_tarball_url_str)))

    return db_tarball_url_str

def fetch_dbopt(base_url, dbopt):
    # urllib2 doesn't respect the no_proxy env var, so on bare metal hosts,
    # the configured proxy rejects our requests for an internal url.
    proxy_handler = urllib2.ProxyHandler({})
    opener = urllib2.build_opener(proxy_handler)
    req = urllib2.Request('{}/dboption/all/{}.json'.format(base_url, dbopt))
    response = opener.open(req, timeout=5)
    body = response.read()
    v = json.loads(body)
    if isinstance(v, list):
        if len(v) == 1:
            v = v[0]
        else:
            bail("bad dbopt response, not a list of 1 element: {}".format(body))
    v = v.get(dbopt)
    if v == "":
        v = None

    return v

def import_scores():
    args = parse_args()

    # we want to delete 'old' files, but we'll want to keep specific
    # files too: the currently active db as well as the one we're
    # about to install.
    retain_files = set()

    conn = S3Connection(host="s3.us-west-2.amazonaws.com")

    url_str = None
    if args.input_s3_override_dbopt:
        url_str = fetch_dbopt(args.dbopt_url, args.input_s3_override_dbopt)

    if url_str:
        logging.warn("using dbopt override")
    else:
        url_str = get_scores_url(conn, urlparse(args.input_s3))

    db_tarball_url = urlparse(url_str)

    db_tarball_filename = path.basename(db_tarball_url.path)
    period = db_tarball_filename.find(".")
    if period == -1:
        period = len(db_tarball_filename)
    db_filename = db_tarball_filename[:period] + ".db"
    db_ext = db_tarball_filename[period:]
    retain_files.add(db_filename)

    # we're gonna create a symlink `vbi_score.db` in the destination dir. let's
    # check if we already did, and if its target looks like the filename we are
    # going to use, we're already done. of course this only works if we don't
    # use the same s3 path every time, and instead put a date or w/e in it.
    dest = path.join(args.destination_dir, "vbi_score.db")
    if path.islink(dest):
        dest_target = os.readlink(dest)
        installed_filename = path.basename(dest_target)
        retain_files.add(installed_filename)
        logging.info("latest is {!r}, installed is {}".format(url_str, installed_filename))
        if installed_filename == db_filename and path.exists(path.join(args.destination_dir, dest_target)):
            # we're not gonna compare file sizes or anything because we're
            # optimistic that we're being sufficiently atomic that we never
            # leave partial writes behind.
            logging.info("nothing to do.")
            return
    else:
        logging.info("latest is {!r}, none installed".format(url_str))

    db_path = path.join(args.destination_dir, db_filename)
    # maybe the file is already there and we just didn't set the link?
    if not path.exists(db_path):
        # no, we're gonna download that file
        bucket = conn.get_bucket(db_tarball_url.hostname, validate=False)
        key = bucket.get_key(db_tarball_url.path, validate=False)
        tmp_path = db_path + ".tmp"
        extract_db(key.get_file, db_ext, tmp_path)
        os.rename(tmp_path, db_path)
    tmp_link = dest + ".tmp"
    os.symlink(db_filename, tmp_link)
    os.rename(tmp_link, dest) # os.symlink refuses to overwrite, rename doesn't

    # select old files for deletion, to recover disk space
    cutoff = time.time() - 60*60
    old_files = find_old_files(args.destination_dir, cutoff, retain_files)
    for file_path in old_files:
        logging.info("deleting old scores db: {}".format(file_path))
        os.remove(file_path)

def find_old_files(directory, cutoff, retain_files):
    # get paths to all .db files we're looking at
    files = glob(directory + "/*.db")

    # don't look at symlinks or weird things (they aren't big anyway!)
    files = [f for f in files if path.isfile(f) and not path.islink(f)]

    # don't look at previous/current file
    files = [f for f in files if path.basename(f) not in retain_files]

    # add and sort by modification time
    files = sorted([(path.getmtime(f), f) for f in files])

    # arbitrarily retain 3 most recent files
    files = files[:-3]

    # only look at old files
    files = [f for (m, f) in files if m < cutoff]

    return files

if __name__ == '__main__':
    logging.basicConfig(format="%(asctime)s %(levelname)s %(name)s: %(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO)
    logging.Formatter.converter = time.gmtime
    logging.info("starting iprep score db import")
    try:
        import_scores()
        logging.info("success")
    except Exception:
        logging.error("unhandled exception", exc_info=True)
        sys.exit(-1)
    finally:
        logging.shutdown()
