#!/usr/bin/perl -w
#выбор категории по семантическому ядру

use strict;
use utf8;
use open ':utf8';
no warnings 'utf8';
use Data::Dumper;

binmode(STDIN,  ":utf8");
binmode(STDOUT, ":utf8");
binmode(STDERR, ":utf8");

my %dict;
open F, "word_index_id";
my $cnt = 0;
while (<F>) {
    print STDERR "$cnt\r" if ++$cnt % 1000 == 0;
    chomp;
    my ($word, @ctg_list) = split /\t/;
    $dict{$word} = join("\t", @ctg_list);
}
print STDERR "\n";

while (<STDIN>) { #test.txt
    chomp;

    my @core = split / /;

    my %cross = ();
    my @cross = ();
    for my $word (@core) {
        if ($dict{$word}) {
            #print "$word\t$dict{$word}\n";
            #print "--\n\n";
            my @ctg = split /\t/, $dict{$word}; #четные - ctg_id, нечетные - веса (tf_idf)

            for (my $i = 0; $i <= $#cross-1; $i += 2) {
                $cross{$cross[$i]} = $cross[$i+1];
            }

            @cross = ();
            for (my $i = 0; $i <= $#ctg-1; $i += 2) {
                if ($cross{$ctg[$i]}) {
                    push @cross, $ctg[$i], $cross{$ctg[$i]} + $ctg[$i+1];
                } elsif (scalar(keys %cross) == 0) {
                    push @cross, $ctg[$i], $ctg[$i+1];
               }
            }
            %cross = ();
        } else {
            print STDERR "$word <NOT_FOUND>\n"
        }
    }

    for (my $i = 0; $i <= $#cross-1; $i += 2) {
        $cross{$cross[$i]} = $cross[$i+1];
    }

    for (sort {$cross{$b} <=> $cross{$a} } keys %cross) {
        print "$_\t$cross{$_}\n";
    }
}
