#!/usr/bin/python
# -*- coding: utf-8 -*-

#определение частотности категорий

import yt.wrapper as yt


def reducer(key, recs):
    cnt = 0
    for rec in recs:
        cnt += 1
    yield { "mctgs": key['mctgs'], "freq": -cnt }


def main():
    input = '//home/catalogia/users/yuryz/bnrs_norm_s'
    output = '//home/catalogia/users/yuryz/ctgs_freq'

    yt.run_reduce(reducer, input, output, reduce_by=['mctgs'])

    yt.run_sort(output, sort_by=['freq', 'mctgs'])

    print yt.row_count(input)
    print yt.row_count(output)


if __name__ == '__main__':
    main()
