#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import sys
import os
import codecs
import argparse
import json
from collections import defaultdict, Counter


def tabulate(*args):
    return '\t'.join(map(format, args))


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--output')
    parser.add_argument('--query_text_field')
    parser.add_argument('input', nargs='+')
    args = parser.parse_args()

    bdict = defaultdict(set)

    for i, basket in enumerate(args.input):
        num = i + 1
        obj = json.load(open(basket))
        for query in obj:
            bdict[query[args.query_text_field]].add(num)

    for num in range(1, num + 1):
        this_basket_queries = set()
        print('basket #{}'.format(num))
        bcounter = Counter()
        for q in bdict:
            if num in bdict[q]:
                this_basket_queries.add(q)
                id_ = ",".join(map(format, sorted(bdict[q] - {num})))
                if not id_:
                    id_ = "unique"
                bcounter[id_] += 1
        for id_ in bcounter:
            print(tabulate(
                id_, bcounter[id_],
                '{:.02f}'.format(bcounter[id_] / sum(bcounter.values()))
            ))
        print('\n-----\n')

    bdict = dict(bdict)
    for q in bdict:
        bdict[q] = sorted(bdict[q])

    json.dump(
        bdict, codecs.open(args.output, 'w', 'utf8'),
        ensure_ascii=False, indent=2, sort_keys=True
    )


if __name__ == "__main__":
    main()
