#!/usr/bin/python
# -*- coding: utf-8 -*-

#суммирование кликов и частот дублей в топе запросов

import yt.wrapper as yt


def reducer(key, recs):
    clicks = 0
    freq = 0
    for rec in recs:
        clicks += rec['clicks']
        freq += 1
    yield {'url': key['url'], 'position': key['position'], 'query': key['query'], 'clicks': clicks, 'freq': freq}


def main():
    input = '//home/catalogia/yuryz/queries_top'
    output = '//home/catalogia/yuryz/queries_top_sum'

    yt.run_reduce(reducer, input, output, reduce_by=['url', 'position', 'query'])
    yt.run_sort(output, sort_by=['url', 'position', 'query'])

    print yt.row_count(input)
    print yt.row_count(output)


if __name__ == '__main__':
    main()
