#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import unicode_literals
from __future__ import division

import json
import copy
import argparse
import random
import re


rnd = random.SystemRandom()


componentInfo = {
    'alignment': 3,
    'rank': 0,
    'type': 1
}


def transform_device(device):
    if device == 'desktop':
        return 0
    elif device == 'mobile':
        if rnd.randint(0, 10) >= 7:
            return 2
        return 1


def process_duration(duration):
    if not duration:
        return '0:00'
    if not isinstance(duration, basestring):
        duration = str(duration)
    return re.sub(r'[^0-9:]', '', duration) or '0:00'


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('input')
    parser.add_argument('output')
    parser.add_argument('--type', default='video')
    args = parser.parse_args()

    inp = json.load(open(args.input))

    out = []
    for serp in inp:
        out_serp = {}
        out_serp['type'] = 'SERP'
        out_serp['query'] = {
            'country': serp['country'],
            'device': transform_device(serp['device']),
            'regionId': serp['region_id'],
            'text': serp['query'],
        }
        out_serp['additional'] = {}
        for key in (set(serp.keys()) - {'query', 'serp', 'region_id'}):
            out_serp['additional'][key] = serp[key]
        out_serp['text.additional'] = json.dumps(out_serp['additional'])
        if 'queryfresh' in serp and 'search_type' in serp and serp['search_type'] in ['video', 'images']:
            if serp['search_type'] == 'images':
                queryfresh_scale = 'images_queryfresh'
            else:
                queryfresh_scale = 'video_queryfresh'
            out_serp['judgements.{}'.format(queryfresh_scale)] = {
                                                    'scale': queryfresh_scale,
                                                    'name': str(serp['queryfresh']),
                                                    'value': serp['queryfresh']
                                                }
        out_serp['components'] = []
        for i, comp in enumerate(serp['serp']):
            new = {'type': 'COMPONENT'}
            new['componentInfo'] = copy.deepcopy(componentInfo)
            new['componentInfo']['rank'] = i + 1
            if args.type == 'video':
                new['componentUrl'] = {'pageUrl': comp['url']}
                if 'duration' in comp:
                    new['text.videoDuration'] = (
                        process_duration(comp.get('duration'))
                    )
                if 'json.videoPreviews' in comp:
                    new['json.videoPreviews'] = comp['json.videoPreviews']
                if 'video-player-html' in comp:
                    new['component_vdp_code'] = comp['video-player-html']
            elif args.type == 'images':
                new['componentUrl'] = {
                    'pageUrl': comp['page-url']
                }
                if 'source' in comp:
                    new['text.SERVER_DESCR'] = comp['source']
                new['imageadd'] = {
                    'url': comp['url']
                }
                if len(comp.get('image-candidates', [])) > 0:
                    new['imageadd']['candidates'] = comp.get('image-candidates', [])
            out_serp['components'].append(new)
        out.append(out_serp)
    json.dump(out, open(args.output, 'w'), indent=2, sort_keys=True)


if __name__ == "__main__":
    main()
