#!/usr/bin/env python
# -*- coding: utf-8 -*-

import yt.wrapper as yt
import argparse
import sys
from datetime import datetime
from statface_client import StatfaceClient, StatfaceReportConfig


def parse_args():
    parser = argparse.ArgumentParser(description='Calculate count of images left over from the previous batch')
    parser.add_argument('-c', '--yt-cluster', default='hahn', help='YT cluster.')
    parser.add_argument('--date', required=True, help='File with date')
    parser.add_argument('--date-format', default='%Y-%m-%d', help='Date format')
    parser.add_argument('--batch-table-root', required=True, help='Path of the batch table root')
    parser.add_argument('--current-table-pattern', required=True, help='Current batch table name pattern')
    parser.add_argument('-t', '--token-path', default='token', help='Path to the file with statface token.')
    return parser.parse_args()


def get_report_config():
    new_config = StatfaceReportConfig()
    config_in_yaml = u"""
    dimensions:
      - fielddate: date
    measures:
      - leftover_count: number
      - leftover_ratio: number
      - total_count: number
      - new_count: number
    title: Количество картинок, оставшихся с предыдущего батча
    graphs:
    - type: line
      title: Количество новых картинок
      fields:
      - new_count
      titles:
        new_count: Количество новых картинок, попавших в батч
    - type: line
      title: Количество оставшихся картинок
      fields:
      - leftover_count
      titles:
        leftover_count: Количество оставшихся картинок
    - type: line
      title: Доля оставшихся картинок
      fields:
      - leftover_ratio
      titles:
        leftover_ratio: Доля оставшихся картинок
    - type: line
      title: Размер батча
      fields:
      - total_count
      titles:
        total_count: Размер батча
    view_types:
      leftover_ratio:
        type: Float
        precision: 10
    """
    new_config.from_yaml(config_in_yaml)
    return new_config


def get_report(token_path):
    client_config = {
        'host': 'upload.stat.yandex-team.ru',
        "auth_config_path": token_path
    }
    client = StatfaceClient(client_config=client_config)
    report = client.get_report('Search_Spam/CAPTCHA/leftover_count')
    report.upload_config(get_report_config())
    return report


def calculate_leftover_images_count(prev_table, cur_table):
    def mapper(rec):
        yield {
            'table_index': rec['@table_index'],
            'unknown_id': rec['unknown_id']
        }

    def reducer(key, recs):
        in_prev = False
        in_cur = False
        for rec in recs:
            if rec['table_index'] == 0:
                in_prev = True
            elif rec['table_index'] == 1:
                in_cur = True

        if in_prev and in_cur:
            yield {
                'unknown_id': key['unknown_id']
            }

    with yt.TempTable() as dst:
        yt.run_map_reduce(mapper, reducer, [prev_table, cur_table], dst, reduce_by=['unknown_id'],
                            format=yt.YsonFormat(control_attributes_mode='row_fields'))
        return yt.row_count(dst)


def unique_key_count(table, key):
    def reducer(key, recs):
        yield key

    with yt.TempTable() as dst:
        yt.run_map_reduce(None, reducer, table, dst, reduce_by=[key])
        return yt.row_count(dst)


def find_previous_table(all_tables, current_table):
    result = None
    for table in all_tables:
        if table < current_table:
            if result is None:
                result = table
            else:
                result = max(result, table)
    return result


def main():
    args = parse_args()

    yt.config['proxy']['url'] = args.yt_cluster

    strdate = open(args.date, 'r').read().strip()
    date = datetime.strptime(strdate, args.date_format)
    current_table = date.strftime(args.current_table_pattern)

    all_tables = yt.list(args.batch_table_root)
    if current_table not in all_tables:
        raise RuntimeError('Table %s not found' % repr(current_table))

    previous_table = find_previous_table(all_tables, current_table)
    if previous_table is None:
        raise RuntimeError('No previous table for %s' % repr(current_table))

    current_table = args.batch_table_root + '/' + current_table
    previous_table = args.batch_table_root + '/' + previous_table

    print >>sys.stderr, 'Current table: %s' % repr(current_table)
    print >>sys.stderr, 'Previous table: %s' % repr(previous_table)

    with yt.Transaction():
        count = calculate_leftover_images_count(previous_table, current_table)
        total_count = unique_key_count(current_table, 'unknown_id')

    data = [{
        'fielddate': date.strftime('%Y-%m-%d'),
        'leftover_count': count,
        'leftover_ratio': float(count)/total_count,
        'total_count': total_count,
        'new_count': total_count - count,
    }]
    report = get_report(args.token_path)
    report.upload_data(scale='daily', data=data)


if __name__ == '__main__':
    main()
