#!/usr/bin/python
# -*- coding: utf-8 -*-

#  "" 

import math
import yt.wrapper as yt


def reducer(key, recs):
    D = {}
    for rec in recs:
        table_index = rec.pop('@table_index')
        if table_index == 0:
            D[rec['word']] = rec['freq']
        else:
            wrds = rec['bnorm'].split()
            rank = 0
            cnt = 0;
            for wrd in wrds:
                if D[wrd]:
                    cnt += 1
                    rank += D[wrd]

            if cnt > 0:
                rank /= cnt
                rank *= math.log2(cnt + 1)

            rec['rank'] = rank
            yield rec


def main():
    input_1 = '//home/catalogia/users/yuryz/dict_freq'
    input_2 = '//home/catalogia/users/yuryz/bnrs_norm_s'
    output = '//home/catalogia/users/yuryz/bnrs_rank'

    yt.run_reduce(reducer, [input_1, input_2], output, reduce_by = 'mctgs')
    #yt.run_sort(output, sort_by=['domain', 'url', 'position'])
    #yt.run_sort('//home/catalogia/yuryz/queries_left_join', '//tmp/yuryz/queries_left_join', sort_by=['domain', 'url', 'position'])

    print yt.row_count(input_1)
    print yt.row_count(input_2)
    print yt.row_count(output)


if __name__ == '__main__':
    main()
