#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from __future__ import division
import sys
import os
import codecs
import argparse
import json


judgement_name = (
    'dynamic_judgement:multi_judgement:video_query_duplicates:label'
)


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--input_serps')
    parser.add_argument('--input_toloka')
    parser.add_argument('--output')
    args = parser.parse_args()

    toloka = json.load(open(args.input_toloka))
    serps = json.load(open(args.input_serps))

    dups = set()

    for x in toloka:
        if x['outputValues']['result'] == 'EQUAL':
            dups.add(tuple(sorted([
                x['inputValues']['url1'], x['inputValues']['url2']
            ])))

    result = []

    for query in serps:
        serp = sorted(serps[query], key=lambda x: x['component_position'])
        urls = []
        for element in serp:
            dups_positions = []
            for p, url in enumerate(urls):
                if tuple(sorted([element['component_page_url'], url])) in dups:
                    dups_positions.append(str(p + 1))
            result.append(
                {
                    'query_text': query,
                    'component_page_url': element['component_page_url'],
                    judgement_name: ','.join(dups_positions),
                }
            )
            urls.append(element['component_page_url'])

    json.dump(result, codecs.open(args.output, 'w', 'utf8'), indent=2)


if __name__ == "__main__":
    main()
