#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from __future__ import division
import sys
import os
import codecs
import argparse
import json
from collections import defaultdict
import copy


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('toloka_input')
    parser.add_argument('toloka_output')
    parser.add_argument('output')
    args = parser.parse_args()

    inp = json.load(open(args.toloka_input))
    out = json.load(open(args.toloka_output))
    print(len(inp))
    print(len(out))
    inp_by_uc = defaultdict(list)
    out_by_uc = defaultdict(list)
    for x in inp:
        inp_by_uc[(x['url'], x['code'])].append(x)
    for x in out:
        out_by_uc[
            (x['inputValues']['url'], x['inputValues']['code'])
        ].append(x)
    bad = [x for x in inp_by_uc if len(inp_by_uc[x]) > len(out_by_uc[x])]
    for k in bad:
        devices_i = {x['device'] for x in inp_by_uc[k]}
        devices_o = {x['inputValues']['device'] for x in out_by_uc[k]}
        for device in (devices_i - devices_o):
            try:
                new = copy.deepcopy(out_by_uc[k][0])
            except IndexError:
                continue
            new['inputValues']['device'] = device
            out.append(new)

    json.dump(out, open(args.output, 'w'), indent=2, sort_keys=True)
    print(len(out))


if __name__ == "__main__":
    main()
