#!/usr/bin/env python
# -*- coding: utf-8 -*-

import yt.wrapper as yt
import argparse
from statface_client import StatfaceClient, StatfaceReportConfig


def parse_args():
    parser = argparse.ArgumentParser(description='Calculate OCR-Toloka diff in CAPTCHA input stream')
    parser.add_argument('-c', '--yt-cluster', default='hahn', help='YT cluster.')
    parser.add_argument('-l', '--last-table', required=True, help='Last markuped input stream table.')
    parser.add_argument('-t', '--token-path', default='token', help='Path to the file with statface token.')
    return parser.parse_args()


def get_report_config():
    new_config = StatfaceReportConfig()
    config_in_yaml = u"""
    dimensions:
      - fielddate: date
    measures:
      - markuped: number
      - diff_ratio: number
    title: Доля отличий между OCR и Толокой во входном потоке
    fill_missing_dates: 0
    graphs:
    - type: line
      title: Количество размеченных картинок
      fields:
      - markuped
      titles:
        markuped: Количество картинок
    - type: line
      title: Доля отличий между OCR и Толокой
      fields:
      - diff_ratio
      titles:
        diff_ratio: Доля отличий между OCR и Толокой во входном потоке
    view_types:
      diff_ratio:
        type: Float
        precision: 10
    """
    new_config.from_yaml(config_in_yaml)
    return new_config


def get_report(token_path):
    client_config = {
        'host': 'upload.stat.yandex-team.ru',
        "auth_config_path": token_path
    }
    client = StatfaceClient(client_config=client_config)
    report = client.get_report('Search_Spam/CAPTCHA/ocr_toloka_diff')
    report.upload_config(get_report_config())
    return report


def calc_metrics(last_table):
    diff_answers = 0
    for record in yt.read_table(last_table, format=yt.JsonFormat()):
        if record["Type"] == "DIFF":
            diff_answers += 1

    return diff_answers


def main():
    args = parse_args()

    yt.config["proxy"]["url"] = args.yt_cluster

    markuped = int(yt.row_count(args.last_table))

    diff_answers = calc_metrics(args.last_table)
    if markuped == 0:
        diff_ratio = 0
    else:
        diff_ratio = float(diff_answers) / markuped

    date = args.last_table.split("/")[-1]
    date = date.replace("_", "-")

    data = [{
        'fielddate': date,
        'markuped': markuped,
        'diff_ratio': diff_ratio,
    }]
    report = get_report(args.token_path)
    report.upload_data(scale='daily', data=data)

if __name__ == "__main__":
    main()
