#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import argparse
import sys
import csv

def HandleOption():
    parser = argparse.ArgumentParser()
    parser.add_argument("-f", "--file", dest="file", required=True,
                        help="Input file with data")
    return parser

def main():
    junk=u'\\\'-_.,—"‘’“”•;:›><()#%@!^&*+=№?[]►▼✔/|`~'
    words={}
    args = HandleOption().parse_args()
    with open(args.file, "r") as f:
        fields = ['query' , 'weight' , 'rnd']
        data = csv.DictReader(f, fieldnames=fields, delimiter='\t')
        for row in data:
            line = row['query'].decode('utf-8').lower()
            w=int(row['weight'])
            for j in junk:
                line=line.replace(j,'')
            tmp=line.split(' ')
            for i in range(len(tmp)):
                if tmp[i]=='':
                    continue
                elif tmp[i] in words:
                    words[tmp[i]]+=w
                else:
                    words[tmp[i]]=w
        for a in words:
            print a.encode('utf-8'),'\t',words[a]

if __name__ == "__main__":
    main()
