#!/usr/bin/perl -w
#снятие омонимии категорий

use strict;
use utf8;
use open ":utf8";
use Data::Dumper;

binmode STDIN, ':utf8';
binmode STDOUT, ':utf8';
binmode STDERR, ':utf8';

my %dict1;
open F, "z1.dic";
while (<F>) {
    chomp;
    my ($word, $freq) = split /\t/;
    $dict1{$word} = $freq;
}

my %dict2;
open F, "z2.dic";
while (<F>) {
    chomp;
    my ($word, $freq) = split /\t/;
    $dict2{$word} = $freq;
}

while (<STDIN>) { #z12pre
    chomp;
    my @bnr_word = split / /, bnr_text_clean($_);

    my $score1 = 0;
    my $score2 = 0;
    for (@bnr_word) {
        $score1 += $dict1{$_} if $dict1{$_};
        $score2 += $dict2{$_} if $dict2{$_};
    }

    print "$_\t$score1\t$score2\n";
}


#--- очистка строки текста ---
sub bnr_text_clean {
    my ($text) = @_;

    my $valid_chars = qr/0-9a-zа-яё \-/; #допустимые символы

    $text =~ s/_[^ ]+/ /g; #служебные слова: __delivery_pattern, _percent, ...
    
    $text = lc($text);
    $text =~ s/(?<=[\[\/])\.[$valid_chars]+?(?=[\]\/])/ /g; #атомы
    $text =~ s/-[$valid_chars]+?(?=( |[^$valid_chars]|$))/ /g; #минус слова
    $text =~ s/[^$valid_chars]/ /g;

    $text =~ s/ [^ ] / /g; #однобуквенные

    $text =~ s/^ +//;
    $text =~ s/ +$//;
    $text =~ s/ +/ /g;
    
    return $text;
}
