import os
import time
import json
import hashlib
import argparse


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--arcadia")
    parser.add_argument("--output")
    parser.add_argument("--exts", nargs="+")

    args = parser.parse_args()
    args.exts = set(["." + e.lstrip(".") for e in args.exts])
    return args


def get_hash(root, filename):
    h = hashlib.sha256()
    h.update(filename + '\0')
    buff_size = 16 * 1024
    with open(os.path.join(root, filename)) as afile:
        data = True
        while data:
            data = afile.read(buff_size)
            h.update(data)
    return h.hexdigest()


def dump(hashes, filename):
    with open(filename + '.tmp', 'w') as afile:
        json.dump(hashes, afile)
    # script can be killed at any time - data must be valid
    os.rename(filename + '.tmp', filename)


def main(args):
    open(args.output, 'w').close()
    hashes = {}
    dumped_size = 0
    last_dump = time.time()

    for root, _, files in os.walk(args.arcadia):
        for filename in files:
            if os.path.splitext(filename)[-1] not in args.exts:
                continue

            absname = os.path.join(root, filename)
            if not os.path.isfile(absname):
                continue

            filename = os.path.relpath(absname, args.arcadia)
            hashes[filename] = get_hash(args.arcadia, filename)

            # script can be killed at any time - dump progress from time to time
            if len(hashes) - dumped_size > 10000 or time.time() - last_dump > 10.0:
                dump(hashes, args.output)
                dumped_size = len(hashes)
                last_dump = time.time()

    dump(hashes, args.output)


if __name__ == "__main__":
    exit(main(parse_args()))
