#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import sys
import os
import codecs
import json
import argparse


def extract_date(s):
    return json.loads(s)['date']


def make_unique(out):
    used = set()
    result = []
    for x in out:
        if (x['query'], x['url']) in used:
            continue
        else:
            result.append(x)
            used.add((x['query'], x['url']))
    return result


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--output')
    parser.add_argument('input')
    args = parser.parse_args()

    inp = json.load(open(args.input))
    out = []

    for el in inp:
        out.append(
            {
                'query': extract_date(el['SERP.text.additional']),
                'json.videoPreviews': el.get(
                    'json.videoPreviews'
                ) or [],
                'region_id': el['query_region_id'],
                'country': el['query_country'],
                'url': el['component_page_url'],
            }
        )

    out = make_unique(out)

    json.dump(out, open(args.output, 'w'), indent=2, sort_keys=True)


if __name__ == "__main__":
    main()
