#!/usr/bin/env python
# -*- coding: utf-8 -*-
import json
import codecs
import argparse
import itertools


def parse_duration(s):
    if not s:
        return
    s = s.split(':')
    s = [int(x) for x in s]
    if len(s) == 3:
        return 3600 * s[0] + 60 * s[1] + s[2]
    elif len(s) == 2:
        return 60 * s[0] + s[1]


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('input')
    parser.add_argument('output')
    parser.add_argument('output_serps')
    parser.add_argument('fixed_pair')
    args = parser.parse_args()

    fixed_pair = json.load(open(args.fixed_pair))

    obj = json.load(open(args.input))
    shit_urls = []
    serps_output = []
    result = []
    player_urls = {}
    e = 0
    for it in obj:
        urls = []
        for x in it['serp-page']['parser-results']['components']:
            duration = parse_duration(
                ((x or {}).get('video') or {}).get('duration')
            )
            print(e)
            e += 1
            if not x['page-url']:
                continue
            if duration and duration < 50 * 60:
                shit_urls.append(x['page-url'])
                continue
            urls.append(x['page-url'])
            player_urls[x['page-url']] = x.get('video-player-html') or ''
        serps_output.append(
            {
                'query': it[
                    'serp-request-explained'
                ]['per-query-parameters']['query-text'],
                'serp': urls,
                'fixed_pair': fixed_pair,
                'shit_urls': shit_urls
            }
        )
        for url in urls:
            result.append([url, fixed_pair['url']])
        if fixed_pair['player_url']:
            player_urls[fixed_pair['url']] = fixed_pair['player_url']

    print('gonna write some files')
    json.dump(
        {'pairs': result, 'player_urls': player_urls},
        open(args.output, 'w'), indent=2
    )
    json.dump(
        serps_output, codecs.open(args.output_serps, 'w', 'utf8'),
        indent=2, sort_keys=True, ensure_ascii=False
    )
    print('files are written')



if __name__ == "__main__":
    main()
