#!/usr/bin/env python
#
# Compare the hash of objects in s3
#
# examples
# s3-compare-contents twitch-abrash-182728691873.us-west-2 twitch-abrash-182728691873.us-east-2 test.json
#
# aws s3 ls --recursive s3://twitch-abrash-182728691873.us-west-2/ | awk '{print $4}' | ./s3-compare-contents twitch-abrash-182728691873.us-west-2 twitch-abrash-182728691873.us-east-2
#

import botocore

def parse_args():
    import argparse
    parser = argparse.ArgumentParser(description='Compare hashes of s3 objects in 2 buckets',
                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('bucket1', help='First bucket', metavar='bucket')
    parser.add_argument('bucket2', help='Second bucket', metavar='bucket')
    parser.add_argument('--key', '-k', help='Object key(s)', nargs='*')
    return parser.parse_args()

def compare(client, bucket1, bucket2, key):
    try:
        one = client.head_object(Bucket=bucket1, Key=key)['ResponseMetadata']['HTTPHeaders']['etag']
    except botocore.exceptions.ClientError as e:
        print("Error: {} {} {}".format(key, bucket1, str(e)))
        return
    try:
        two = client.head_object(Bucket=bucket2, Key=key)['ResponseMetadata']['HTTPHeaders']['etag']
    except botocore.exceptions.ClientError as e:
        print("Error: {} {} {}".format(key, bucket2, str(e)))
        return
    if one != two:
        print("Mismatch: {} {} {}".format(key, bucket1, bucket2))

def main():
    import boto3
    args = parse_args()
    client = boto3.client('s3')
    #print("--bucket {} --key {}".format(args.bucket2, args.key))
    if args.key:
        for key in args.key:
            compare(client, args.bucket1, args.bucket2, key)
    else:
        # read from stdin
        import sys
        for key in sys.stdin.readlines():
            compare(client, args.bucket1, args.bucket2, key.rstrip())

if __name__ == '__main__':
    main()
