#!/usr/bin/python
# -*- coding: utf-8 -*-

#сравнение результатов категоризации с одним и с двумя DSSM-моделми

import sys
import re
import yt.wrapper as yt

def bnrs_cmp(key, recs): #получение информации о баннерах из banners_extended
    bid = 0
    for rec in recs:
        table_index = rec.pop('@table_index')
        if table_index == 0: #'//home/catalogia/users/yuryz/multik/test_with_categs_000'
            bid = rec['bid']
            lbl = rec['labels']
        elif bid != 0: #'//home/catalogia/users/yuryz/multik/test_with_categs_087'
            if lbl != rec['labels']:
                del rec['categories']
                del rec['true_categories']
                rec['labels_src'] = lbl
                yield rec


def main():
    tab1_ = '//home/catalogia/users/yuryz/multik/test_with_categs_000'
    tab2 = '//home/catalogia/users/yuryz/multik/test_with_categs_087'

    tab3 = '//home/catalogia/users/yuryz/multik/bnrs_cmp'

    yt.run_reduce(bnrs_cmp, [tab1_, tab2], tab3, reduce_by = ['bid'], format=yt.YsonFormat(control_attributes_mode="row_fields"))
    yt.run_sort(tab3, sort_by=['bid', 'labels', 'labels_src', 'title', 'snippet'])


if __name__ == '__main__':
    main()
