#! /usr/bin/env perl

use lib 'scripts/lib';

use utf8;
use Project;

my $proj = Project->new({
    load_dicts                              => 1,
    load_minicategs_light                   => 1,
    allow_lazy_dicts                        => 1,
    use_comptrie_subphraser                 => 1,
    use_sandbox_categories_suppression_dict => 1,
});
$proj->categs_tree->never_read_categs_cache(1);
$proj->categs_tree->never_write_categs_cache(1);

while (my $line = <STDIN>) {
    chomp($line);
    my @a = split /\t/, $line;
    my $title;
    my $body;
    my $phrases;
    my $title_ext;
    for my $a (@a) {
        $title = substr($a, 6) if $a =~ /^title=/;
        $body = substr($a, 5) if $a =~ /^body=/;
        $phrases = substr($a, 8) if $a =~ /^phrases=/;
        $title_ext = substr($a, 16) if $a =~ /^title_extension=/;
    }

    my $title_pre = $proj->phrase($title)->get_banner_prefiltered_phrase->text; # title
    my $title_nrm = $proj->phrase($title_pre)->norm_phr_ordered; # title    

    my $body_pre = $proj->phrase($body)->get_banner_prefiltered_phrase->text; # body
    my $body_nrm = $proj->phrase($body_pre)->norm_phr_ordered; # body    

    my $phrases_nrm = "";
    if ($phrases) {
        $phrases =~ s/\+//g;
        $phrases =~ s/!//g;
        my $phrases_pre = $proj->phrase($phrases)->get_banner_prefiltered_phrase->text;
        $phrases_nrm = $proj->phrase($phrases_pre)->norm_phr;
    }

    my $title_ext_nrm = "";
    if ($title_ext) {
        my $title_ext_pre = $proj->phrase($title_ext)->get_banner_prefiltered_phrase->text;
        my $title_ext_nrm = $proj->phrase($title_ext_pre)->norm_phr_ordered;
    }

    print "$line\ttitle_norm=$title_nrm\tbody_norm=$body_nrm\tphrases_norm=$phrases_nrm\ttitle_extension_norm=$title_ext_nrm\n";
}
