#!/usr/bin/env python
#
# Repeat a set of queries

import argparse
import time

def parse_args():
    parser = argparse.ArgumentParser(description="Bulk run queries with dict substitution parameters.")
    parser.add_argument('queries', metavar='file', nargs='+',
                        help=("A file containing a query. Each file will be formatted with all arguments which "
                              "are 'passed to query' according to the descriptions as the name of the argument."))
    parser.add_argument('--limit', '-l', type=int, default=1000, help="Max number of rows to affect. Passed to query.")
    parser.add_argument('--start', '-S', type=int, default=None,
                        help=("Set a starting point for iterating over a large dataset in batches. For each batch "
                              "of size 'limit', the start value will be set to the last value in the first column "
                              "of the last row returned by the and passed to the next batch. This means you will "
                              "likely want to use the 'UPDATE...RETURNING id' idiom for write operations. "
                              "Passed to query."))
    parser.add_argument('--relation', '-r', default=None, help="Relation to work on. Passed to query.")
    parser.add_argument('--sleep', '-s', type=float, default=1.0, help="How long to sleep between loops.")
    parser.add_argument('--max', '-m', type=int, default=1000,
                        help="Maximum number of rows to affect. Accurate modulo limit parameter. Set 0 to disable.")
    parser.add_argument('--stdout', action='store_true', default=False, help="Print queries and exit")
    parser.add_argument('--verbose', '-v', action='store_true', default=False, help="Print status as we go.")
    parser.add_argument('--host', '-H', default='/var/run/postgresql')
    parser.add_argument('--port', '-P', type=int, default=5432)
    parser.add_argument('--user', '-u', default='postgres')
    parser.add_argument('--pass', '-p', dest='passwd', default='')
    parser.add_argument('--db', '-d', default='')
    args = parser.parse_args()
    if args.start is not None and len(args.queries) > 1:
        parser.error("Can only have one query if you are expecting an ordering")
    return args

def once(cc, templates, params, verbose):
    "Run the queries provided and return rows affected and last_id if available"
    total = 0
    rowcount = 0
    last_id = None
    for t in templates:
        query = t.format(**params)
        if cc is None:
            print(query)
            continue
        if verbose:
            start = time.time()
            print(query)
        cc.execute(query)
        rowcount = cc.rowcount
        if rowcount == -1:
            if verbose:
                print("reassigning -1 to 0 under presumption this is a ddl-like statement")
            rowcount = 0
        if rowcount > 0:
            for row in cc:
                if last_id is None:
                    last_id = row[0]
                elif row[0] is not None:
                    last_id = max(int(row[0]), last_id)
        total += rowcount
        if verbose:
            finish = time.time()
            duration = finish - start
            if last_id is not None:
                print("last id: {0}".format(last_id))
            print("processed {0} records in {1} seconds.".format(rowcount, duration))
    return total, last_id

def loop(cc, templates, params, left, sleep_s, verbose):
    "Run the queries, return when done and otherwise sleep and continue"
    if left == 0:
        left = None
    total = 0
    more = True
    last_id = None
    try:
        while more:
            rows, last_id = once(cc, templates, params, verbose)
            total += rows
            if left is not None:
                left -= rows
            if rows == 0 or (left is not None and left <= 0):
                more = False
            else:
                if params['start'] is not None and last_id is not None:
                    params['start'] = last_id
                time.sleep(sleep_s)
    finally:
        if last_id is not None:
            print("last id: {0}".format(last_id))
        print("affected {0} total rows".format(total))

def run(templates, args):
    "Do the queries against the specified db"
    if not args.stdout:
        import psycopg2
        db = psycopg2.connect(host=args.host, port=args.port, user=args.user, password=args.passwd, database=args.db)
        db.autocommit = True
        cc = db.cursor()
    else:
        cc = None
    # XXX AGB: janky copy of parameters in parser arguments. Figure out a better way to do this. 2015-02-25
    params = {
        'limit': args.limit,
        'relation': args.relation,
        'start': args.start,
        }
    loop(cc, templates, params, args.max, args.sleep, args.verbose)

def main():
    "Program entry point"
    args = parse_args()
    templates = [" ".join(open(q).read().split()) for q in args.queries]
    run(templates, args)

if __name__ == '__main__':
   main()
