#!/usr/bin/env python

import argparse
import json
import tarfile
import time
import sys
import cStringIO

import ydb


def parse_args():
    parser = argparse.ArgumentParser(description='Make fallback archive by copying sample of database')
    parser.add_argument('-E', '--endpoint', required=True, help='YDB endpoint')
    parser.add_argument('-D', '--database', required=True, help='Database name')
    parser.add_argument('-I', '--table-items', required=True, help='Items table name')
    parser.add_argument('-V', '--table-versions', required=True, help='Versions table name')
    parser.add_argument('-A', '--auth-token-file', type=argparse.FileType('r'), help='File with auth token')
    parser.add_argument('-L', '--limit', type=int, default=100, help='Copy at most this number of items of each type')
    parser.add_argument('output', help='Output archive')
    return parser.parse_args()


def add_to_archive(output, name, content):
    info = tarfile.TarInfo(name)
    info.size = len(content)
    info.mtime = time.time()
    output.addfile(info, cStringIO.StringIO(content))


def load_current_versions(session, table_versions):
    query = '''
        select times.type as type, version
        from [{versions}]
        inner join (
            select type, max(timestamp) as timestamp from [{versions}] group by type
        ) as times
        using (type, timestamp)
    '''.format(versions=table_versions)
    result_sets = session.transaction().execute(query, commit_tx=True)
    return [(row.type, row.version) for row in result_sets[0].rows]


def load_items(session, table_items, type, version, limit):
    query = '''
        declare $type as String;
        declare $version as String;
        select id, data, metadata from [{items}] where type = $type and version = $version
        limit {limit}
    '''.format(items=table_items, limit=limit)
    parameters = {
        '$type': type,
        '$version': version
    }
    prepared_query = session.prepare(query)
    result_sets = session.transaction().execute(prepared_query, parameters, commit_tx=True)
    for row in result_sets[0].rows:
        yield row.id, row.data, json.loads(row.metadata)


def make_fallback_archive(session, table_items, table_versions, limit, output):
    index = {}
    for type, version in load_current_versions(session, table_versions):
        count = 0
        for id, data, metadata in load_items(session, table_items, type, version, limit):
            name = '/'.join((type, id.replace('/', '_')))
            if name in index:
                raise RuntimeError("Name collision: %s" % repr(name))
            add_to_archive(output, name, data)
            index[name] = {
                'type': type,
                'version': version,
                'id': id,
                'metadata': metadata
            }
            count += 1
        print >>sys.stderr, 'Downloaded type %s (version %s): %d items' % (repr(type), repr(version), count)
    assert 'index' not in index
    add_to_archive(output, 'index', json.dumps(index, indent=1))


def main():
    args = parse_args()
    auth_token = None
    if args.auth_token_file:
        auth_token = args.auth_token_file.read().strip()

    connection_params = ydb.DriverConfig(args.endpoint, args.database, auth_token=auth_token)
    driver = ydb.Driver(connection_params)
    try:
        driver.wait(timeout=5)
        session = driver.table_client.session().create()

        with tarfile.open(args.output, 'w|gz') as output:
            make_fallback_archive(session, args.table_items, args.table_versions, args.limit, output)
    finally:
        driver.stop()

if __name__ == '__main__':
    main()
