#!/usr/bin/env python
# -*- coding: utf-8 -*-

import yt.wrapper as yt
import argparse
from statface_client import StatfaceClient, StatfaceReportConfig


def parse_args():
    parser = argparse.ArgumentParser(description='Calculate OCR CAPTCHA markup quality')
    parser.add_argument('-c', '--yt-cluster', default='hahn', help='YT cluster.')
    parser.add_argument('-l', '--last-table', required=True, help='Last markup results table.')
    parser.add_argument('-t', '--token-path', default='token', help='Path to the file with statface token.')
    return parser.parse_args()


def get_report_config():
    new_config = StatfaceReportConfig()
    config_in_yaml = u"""
    dimensions:
      - fielddate: date
    measures:
      - markuped: number
      - correct_answers_ratio: number
      - diff_correct_answers_ratio: number
    title: Качество картинок с ответами по ежедневной разметке
    graphs:
    - type: line
      title: Количество размеченных за день картинок
      fields:
      - markuped
      titles:
        markuped: Количество картинок
    - type: line
      title: Доля правильно размеченных картинок
      fields:
      - correct_answers_ratio
      titles:
        correct_answers_ratio: Доля правильных ответов
    - type: line
      title: Доля правильно размеченных картинок с отличием от OCR
      fields:
      - diff_correct_answers_ratio
      titles:
        diff_correct_answers_ratio: Доля правильных ответов с отличием от OCR
    view_types:
      correct_answers_ratio:
        type: Float
        precision: 10
      diff_correct_answers_ratio:
        type: Float
        precision: 10
    """
    new_config.from_yaml(config_in_yaml)
    return new_config


def get_report(token_path):
    client_config = {
        'host': 'upload.stat.yandex-team.ru',
        "auth_config_path": token_path
    }
    client = StatfaceClient(client_config=client_config)
    report = client.get_report('Search_Spam/CAPTCHA/markuped_daily_count')
    report.upload_config(get_report_config())
    return report


def calc_precision_metrics(last_table):
    correct_answers = 0
    diff_correct_answers = 0
    for record in yt.read_table(last_table, format=yt.JsonFormat()):
        if record["CaptchaAnswer"] == record["WordGT"]:
            correct_answers += 1
            if record["CaptchaAnswer"] != record["OCRRecognition"]:
                diff_correct_answers += 1

    return correct_answers, diff_correct_answers


def main():
    args = parse_args()

    yt.config["proxy"]["url"] = args.yt_cluster

    markuped = int(yt.row_count(args.last_table))

    correct_answers, diff_correct_answers = calc_precision_metrics(args.last_table)
    correct_answers_ratio = float(correct_answers) / markuped
    if correct_answers == 0:
        diff_correct_answers_ratio = 0
    else:
        diff_correct_answers_ratio = float(diff_correct_answers) / correct_answers

    date = args.last_table.split("/")[-1]
    date = date.replace("_", "-")

    data = [{
        'fielddate': date,
        'markuped': markuped,
        'correct_answers_ratio': correct_answers_ratio,
        'diff_correct_answers_ratio': diff_correct_answers_ratio,
    }]
    report = get_report(args.token_path)
    report.upload_data(scale='daily', data=data)

if __name__ == "__main__":
    main()
