#!/usr/bin/perl -w
#выбор категории по семантическому ядру

use strict;
use utf8;
use open ':utf8';
no warnings 'utf8';
use Data::Dumper;

binmode(STDIN,  ":utf8");
binmode(STDOUT, ":utf8");
binmode(STDERR, ":utf8");

use lib "/home/yuryz/arcadia/rt-research/broadmatching/scripts/lib";
use Project;

my $proj = Project->new({ 
    load_dicts => 1,
    load_minicategs_light => 1,
});


my $bnr_total = 0; #обработано баннеров
my $bnr_corr = 0; #из них правильно

while (<STDIN>) { #test
    chomp;

    my ($bid, $title, $body, $mctgs) = split /\t/;
    print "$bid\t$title\t$body\t$mctgs\n";

    my $bnr_pre = $proj->phrase("$title $body")->get_banner_prefiltered_phrase->text; #префильтрация баннера
    print "$bnr_pre\n";
    my $bnr_nrm = $proj->phrase($bnr_pre)->norm_phr; #нормализация текста баннера
    print "$bnr_nrm\n";
    my $bnr_nrm_ord = $proj->phrase($bnr_pre)->norm_phr_ordered; #нормализация текста баннера с учетом порядка
    print "$bnr_nrm_ord\n";
    print "--\n";
}


#--- ProgressBar ---
sub progress {
    my ($count, $total, $status) = @_;
    my $bar_len = 50;
    my $filled_len = int(sprintf("%.f", $bar_len * $count / $total));

    my $percents = sprintf "%.1f", 100.0 * $count / $total;
    my $bar = ('■' x $filled_len).('-' x ($bar_len - $filled_len));

    printf STDERR "[%s] %s%s %s\r", $bar, $percents, '%', $status;
    STDERR->flush();
}
