# -*- coding: utf-8 -*-
import sys, codecs, json
from bs4 import BeautifulSoup as BS

if len(sys.argv) != 4:
    print >>sys.stderr, "Usage:", sys.argv[0], "<template> <tasks> <limit>"
    sys.exit(1)

template_filename = sys.argv[1]
tasks_filename = sys.argv[2]
limit = int(sys.argv[3])

with codecs.open(template_filename, encoding='utf-8') as f:
    template = f.read()

header_template = unicode("""<h4 class="task__title">{query}</h4>""")
image_template = unicode("""<label class="image__label{color_label}"><img src="{image}" class="image__pic" />
            <br/>
            <span class="choice__hotkey">{span}</span>
        </label>""")
break_template = unicode('<hr>')

body = ''

with codecs.open(tasks_filename, encoding='utf-8') as f:
    tasks = json.load(f)

tasks.sort(key=lambda x: x['inputValues']['query_text'])
if limit > 0:
    tasks = tasks[:limit]

total_tasks = len(tasks)

for cnt, serp in enumerate(tasks, 1):
    sys.stderr.write('\rProgress: {}/{}'.format(cnt, total_tasks))

    body += header_template.format(query=serp['inputValues']['query_text'])
    groups = {}

    for i, dup_group in enumerate(serp['outputValues']['result']['dups']):
        for img in dup_group:
            groups[img] = i

    for i, img in enumerate(serp['inputValues']['images_list'], 1):
        if img in groups:
            color = " image__label_selected_{}".format(groups[img])
        else:
            color = ""
        body += image_template.format(color_label=color, image=img, span=i)

    body += break_template

print >>sys.stderr, ""

final_html = template.format(body=body)

soup = BS(final_html, 'lxml')
print soup.prettify(encoding='utf-8')
