#!/usr/bin/python
# -*- coding: utf-8 -*-
# vim: set expandtab:tabstop=4:softtabstop=4:shiftwidth=4:nowrap
# $Id$

import argparse
import re
import os
import sys

def run():
    parser = argparse.ArgumentParser()
    #parser.add_argument()
    #args = parser.parse_args()
    src_shards = [1, 2, 3, 4]
    dst_shards = [5, 6]
    src_part_locations = {}
    dst_parts = {}
    for line in sys.stdin.readlines():
        table, partition, shard, total_rows, rows_by_day = line.rstrip().split('\t')
        table_shortname = table.replace('_v3_mergetree', '').replace('_v2_mergetree', '').replace('_mergetree', '')
        if int(shard) in src_shards:
             src_part_locations.setdefault(table_shortname + ' ' + partition, []).append({'table': table, 'shard': shard, 'rows_by_day': rows_by_day, 'partition': partition})
        else:
             dst_parts.setdefault(table_shortname + ' ' + partition, []).append({'table': table, 'shard': shard, 'rows_by_day': rows_by_day, 'partition': partition})
    dirty_parts = []
    for p in sorted(src_part_locations.keys()):
        if len(filter(lambda l: l['table'].endswith('_v3_mergetree'), src_part_locations[p])) == len(src_part_locations[p]):
            continue
        partition_spec = ','.join(['{table}@{shard}'.format(**_) for _ in src_part_locations[p]])
        ready = False
        total_rows_by_day = [0] * 31
        for l in src_part_locations[p]:
            rows_by_day = map(int, l['rows_by_day'].split(','))
            for i in range(len(total_rows_by_day)):
                total_rows_by_day[i] += rows_by_day[i]
        total_rows_by_day_str = ','.join(map(str, total_rows_by_day))
#        print 'src: ' + p + ' ' + total_rows_by_day_str
        for l in dst_parts.get(p, []):
#            print 'dst: ' + p + ' ' + l['rows_by_day']
            if l['rows_by_day'] == total_rows_by_day_str:
                print ' '.join([p, partition_spec, '->', '{table}@{shard}'.format(**l)])
                ready = True
            else:
                dirty_parts.append(l)
        if not ready:
            print p + ' ' + partition_spec + ' -> XXX'
    for p in dirty_parts:
        print 'ZZZ -> {table}.{partition}@{shard}'.format(**p)

if __name__ == '__main__':
    run()

