#!/usr/bin/env python
# -*- coding: utf-8 -*-
import json
import codecs
import argparse
import itertools


def parse_duration(s):
    if not s:
        return
    s = s.split(':')
    s = [int(x) for x in s]
    if len(s) == 3:
        return 3600 * s[0] + 60 * s[1] + s[2]
    elif len(s) == 2:
        return 60 * s[0] + s[1]


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('input', nargs='+')
    parser.add_argument('--output')
    parser.add_argument('--output_players')
    args = parser.parse_args()

    urls = set()
    player_urls = {}

    for input_ in args.input:
        obj = json.load(open(input_))
        for it in obj:
            for x in it['serp-page']['parser-results']['components']:
                if x['type'] != 'SEARCH_RESULT':
                    continue
                if not x['page-url']:
                    continue
                duration = parse_duration(
                    ((x or {}).get('video') or {}).get('duration')
                )
                if duration and duration < 50 * 60:
                    continue
                urls.add(x['page-url'])
                if x.get('video-player-html'):
                    player_urls[x['page-url']] = x['video-player-html']

    print('gonna write some files')
    with codecs.open(args.output, 'w', 'utf8') as f:
        f.write('\n'.join(sorted(urls)) + '\n')
    json.dump(
        [{'url': x, 'code': player_urls[x]} for x in player_urls],
        open(args.output_players, 'w'),
        indent=2, sort_keys=True
    )
    print('files are written')


if __name__ == "__main__":
    main()
