import sys
import json
import hashlib
import argparse
from captcha.tools.kikimr_uploader.upload_data import upload_data
from captcha.tools.kikimr_uploader.read_archive import read_archive
import yt.wrapper as yt
import yt.yson as yson


def parse_args():
    parser = argparse.ArgumentParser(description='Upload data to table')
    parser.add_argument('-E', '--endpoint', required=True, help='YDB endpoint')
    parser.add_argument('-D', '--database', required=True, help='Database name')
    parser.add_argument('-I', '--table-items', required=True, help='Items table name')
    parser.add_argument('-V', '--table-versions', required=True, help='Versions table name')
    parser.add_argument('-T', '--transaction-limit', type=int, help='Max items per transaction')
    parser.add_argument('-A', '--auth-token-file', type=argparse.FileType('r'), help='File with auth token')

    subparsers = parser.add_subparsers(dest='input_type')
    parser_legacy_tarball = subparsers.add_parser('legacy_tarball', help='Generate tarball in legacy format (answer separated from content by null character)')
    parser_legacy_tarball.add_argument('-t', '--type', required=True, help='Captcha type')
    parser_legacy_tarball.add_argument('-v', '--version', required=True, help='Data version')
    parser_legacy_tarball.add_argument('-c', '--content-type', help='Content-Type for items that does not have it specified')
    parser_legacy_tarball.add_argument('--bare-answer', action='store_true', help='Read answers as bare text (like old implementation)')
    parser_legacy_tarball.add_argument('input', help='Input archive (.tar.gz)')

    parser_legacy_tarball = subparsers.add_parser('yt-table', help='Upload from YT table')
    parser_legacy_tarball.add_argument('-t', '--type', required=True, help='Captcha type')
    parser_legacy_tarball.add_argument('-v', '--version', help='Data version')
    parser_legacy_tarball.add_argument('-c', '--content-type', help='Content-Type for items that does not have it specified')
    parser_legacy_tarball.add_argument('--proxy', required=True, help='YT proxy')
    parser_legacy_tarball.add_argument('--path', required=True, help='YT table path')

    return parser.parse_args()


def load_legacy_tarball(path, content_type, bare_answer):
    for fileinfo in read_archive(path):
        metadata, data = fileinfo['content'].split('\0', 1)
        if not bare_answer:
            metadata = json.loads(metadata)
        else:
            metadata = {"answer": metadata}

        if not metadata.get('content_type') and content_type is not None:
            metadata['content_type'] = content_type

        if not metadata.get('content_type'):
            raise RuntimeError("Can't determine content type for item %s" % repr(fileinfo['name']))

        yield {
            'id': fileinfo['name'],
            'metadata': json.dumps(metadata),
            'data': data
        }


class YtClientMock:
    def read_table(self, path):
        with open(path) as inp:
            for line in inp.readlines():
                yield yson.loads(line.rstrip().rstrip(';'))

    def get_attribute(self, path, attr):
        return None


def load_yt_table(yt_client, path):
    for rec in yt_client.read_table(path):
        metadata = json.loads(rec['metadata'])
        assert metadata["answer"], "Missing 'answer' field in metadata"
        assert metadata["content_type"], "Missing 'content_type' field in metadata"
        yield {
            'id': rec['id'],
            'metadata': json.dumps(metadata),
            'data': rec['data'],
        }


def main():
    args = parse_args()
    auth_token = None
    if args.auth_token_file:
        auth_token = args.auth_token_file.read().strip()

    config = {
        'endpoint': args.endpoint,
        'database': args.database,
        'table_items': args.table_items,
        'table_versions': args.table_versions,
        'auth_token': auth_token
    }

    version = args.version
    if args.input_type == 'legacy_tarball':
        items_iter = load_legacy_tarball(args.input, args.content_type, args.bare_answer)
    elif args.input_type == 'yt-table':
        yt_client = YtClientMock() if args.proxy == "test" else yt.client.YtClient(proxy=args.proxy)
        items_iter = load_yt_table(yt_client, args.path)
        if not version:
            modification_time = yt_client.get_attribute(args.path, 'modification_time')
            assert modification_time
            version = hashlib.md5('|'.join([
                modification_time,
                args.path
            ])).hexdigest()
    else:
        assert False

    assert version
    print >> sys.stderr, "Uploading version {}...".format(version)
    upload_data(config, items_iter, args.type, version, args.transaction_limit)

if __name__ == '__main__':
    main()
