#!/usr/bin/env python
# -*- coding: utf-8 -*-

import argparse
import yt.wrapper as yt

import utils

def parse_args():
    parser = argparse.ArgumentParser(description='Collect answers for unknown pictures')
    parser.add_argument('--yt-proxy', default='hahn', help='YT proxy')
    parser.add_argument('-i', '--input-table', required=True, help='Input table path.')
    parser.add_argument('-o', '--output-table', required=True, help='Output table path.')
    return parser.parse_args()

def mapper(record):
    for key in "type", "answer", "rep":
        if key not in record:
            return

    if record.get('fallback') == 'true':
        return

    if record["type"] not in ["ocr", "ocr_en", "ocr_ru"] or len(record["rep"].split(" ")) != 2:
        return

    assert len(record["answer"].split(" ")) == 2
    answers = record["answer"].split(" ")
    reports = record["rep"].split(" ")

    assert "#####_" in record["answer"]
    if "#####_" in answers[0]:
        unique_name_index = 0
    else:
        unique_name_index = 1

    positions = "left", "right"
    position = positions[unique_name_index]

    unique_name = answers[unique_name_index]
    unique_answer = reports[unique_name_index]
    checkup = answers[1 - unique_name_index]
    checkup_answer = reports[1 - unique_name_index]

    levenshtein_distance = utils.levenshtein_distance(checkup.decode('utf-8'), checkup_answer.decode('utf-8'))
    lowercase_distance = utils.levenshtein_distance(checkup.decode('utf-8').lower(), checkup_answer.decode('utf-8').lower())
    if lowercase_distance > 1:
        return

    unique_name = unique_name[len('#####_'):] + ".png"
    timer = record.get("timer_c")
    if timer is None:
        return
    timestamp = record["timestamp"]

    yield {"timer": timer, "timestamp": timestamp, "unique_name": unique_name, "unique_answer": unique_answer,
           "position": position, "checkup": checkup, "checkup_answer": checkup_answer,
           "levenshtein_distance": levenshtein_distance, "lowercase_distance": lowercase_distance}

def main():
    args = parse_args()

    yt.config["proxy"]["url"] = args.yt_proxy
    yt.config["auto_merge_output"]["action"] = "merge"

    yt.run_map(mapper, args.input_table, args.output_table)

if __name__ == '__main__':
    main()
