#!/usr/bin/python
# -*- coding: utf-8 -*-

#выбор префиксов виртуальных категорий

import yt.wrapper as yt


def reducer(key, recs):
    pos = key['mctgs'].find(' _ ')
    if pos != -1:
        for rec in recs:
            virt_pref = rec['mctgs'][:pos]
            rec["virt_pref"] = virt_pref
            yield rec

        #count = 0
        #for rec in recs:
        #    count += 1
        #    if count <= 100:
        #        virt_pref = rec['mctgs'][:pos]
        #        rec["virt_pref"] = virt_pref
        #        yield rec
        #    else:
        #        break


def main():
    tab1 = '//home/catalogia/users/yuryz/bnrs_etalon'
    tab2 = '//home/catalogia/users/yuryz/virt/bnrs_etalon_s'
    yt.run_sort(tab1, tab2, sort_by=['mctgs', 'rank_rel', 'bid'])

    tab3 = '//home/catalogia/users/yuryz/virt/virt_pref'
    yt.run_reduce(reducer, [tab2], [tab3], reduce_by = ['mctgs'])
    yt.run_sort(tab3, tab3, sort_by=['bid'])

    print yt.row_count(tab1)
    print yt.row_count(tab2)
    print yt.row_count(tab3)

    yt.remove(tab2)

if __name__ == '__main__':
    main()
