#!/usr/bin/python3

import json
import sys

def main():
    usage = '''Usage: {0} <file1> <file2>
Example: {0} data.client_id.123.before-reshard.json data.client_id.123.after-reshard.json'''
    if len(sys.argv) != 3:
        print(usage)
        if len(sys.argv) == 2 and sys.argv[1] in ['-h', '--help']:
            sys.exit(0)
        else:
            sys.exit(1)

    (file1, file2) = sys.argv[1:3]
    data1 = None
    data2 = None
    with open(file1, 'r') as f:
        data1 = json.load(f)
    with open(file2, 'r') as f:
        data2 = json.load(f)

    tables1 = set(data1.keys())
    tables2 = set(data2.keys())
    shards1 = set()
    shards2 = set()
    differences = 0
    if tables1.difference(tables2):
        sys.exit('{} is missing tables: {}'.format(file1, ','.join(list(tables1.difference(tables2)))))
    if tables2.difference(tables1):
        sys.exit('{} is missing tables: {}'.format(file2, ','.join(list(tables2.difference(tables1)))))
    for table in sorted(list(tables1)):
        if table in ['perf_creatives.index', 'retargeting_conditions.index', 'tag_group.index']:
            # у этих таблиц некорректно задана схема и они практически не содержат данных клиента: DIRECT-172119
            continue
        rows1 = data1[table]
        rows2 = data2[table]
        if len(rows1) != len(rows2):
            sys.stderr.write('different number of rows for table {}: {}: {}, {}: {}\n'.format(table, file1, len(rows1), file2, len(rows2)))
            differences += 1
            continue
        for i in range(len(rows1)):
            row1 = rows1[i]
            row2 = rows2[i]
            for k in sorted(row1.keys()):
                if k == '__shard__':
                    shards1.add(row1[k])
                    shards2.add(row2[k])
                if table == 'campaigns' and k in ['wallet_aggr_data', 'wallet_day_budget', 'wallet_day_budget_stop_time']:
                    # не синхронизируются как положено и не используются: https://st.yandex-team.ru/DIRECT-168575#629cfab22b5a5d6bb3f288bc
                    continue
                if k != '__shard__' and row1[k] != row2[k]:
                    sys.stderr.write('table {}, row {}: {} {} != {}\n'.format(table, i, k, repr(row1[k]), repr(row2[k])))
                    differences += 1
    if len(shards1) > 1:
        sys.exit('{} has records for different shards: {}'.format(file1, ','.join(sorted([str(sh) for sh in shards1]))))
    if len(shards2) > 1:
        sys.exit('{} has records for different shards: {}'.format(file2, ','.join(sorted([str(sh) for sh in shards2]))))
    if differences:
        sys.exit(1)
    else:
        sys.stderr.write('ok\n')
        sys.exit(0)


if __name__ == '__main__':
    main()
