#!/usr/bin/python
# -*- coding: utf-8 -*-

#добавление ранга "эталонности" к основной таблице bnrs_norm

import yt.wrapper as yt


def rank_abs(key, recs):
    rank = 0
    for rec in recs:
        table_index = rec.pop('@table_index')
        if table_index == 0:
            rank = -float(rec['rank'])
        else:
            if rank != 0:
                rec['rank'] = rank
                yield rec


def rank_rel(key, recs):
    flag = 0
    for rec in recs:
        table_index = rec.pop('@table_index')
        if table_index == 0:
            size = rec['size']
        else:
            if flag == 0:
                flag = 1
                rank_max = rec['rank']
            rank_rel = '%.3f' % (-rec['rank'] / rank_max)
            rec['rank_rel'] = float(rank_rel)
            rec['size'] = -size
            yield rec


def main():
    input_1 = '//home/catalogia/users/yuryz/etalon/bnrs_rank'
    input_2 = '//home/catalogia/users/yuryz/bnrs_norm'
    input_3 = '//home/catalogia/users/yuryz/etalon/ctg_size'

    output_1 = '//home/catalogia/users/yuryz/etalon/bnrs_rank_abs' #абсолютный ранг
    output_2 = '//home/catalogia/users/yuryz/etalon/bnrs_rank_rel_s' #относительный ранг

    yt.run_reduce(rank_abs, [input_1, input_2], [output_1], reduce_by = ['bid'], format=yt.YsonFormat(control_attributes_mode="row_fields"))
    yt.run_sort(output_1, sort_by=['mctgs', 'rank'])

    yt.run_reduce(rank_rel, [input_3, output_1], [output_2], reduce_by = ['mctgs'], format=yt.YsonFormat(control_attributes_mode="row_fields"))
    yt.run_sort(output_2, sort_by=['mctgs', 'rank_rel', 'bid'])

    print yt.row_count(input_1)
    print yt.row_count(input_2)
    print yt.row_count(input_3)

    print yt.row_count(output_1)
    print yt.row_count(output_2)


if __name__ == '__main__':
    main()
