#!/usr/bin/perl -w
#печать "сырых" фраз атомов

use strict;
use utf8;
use open ':utf8';
no warnings 'utf8';
use Data::Dumper;

binmode(STDIN,  ":utf8");
binmode(STDOUT, ":utf8");
binmode(STDERR, ":utf8");

use lib "/home/yuryz/arcadia/rt-research/broadmatching/scripts/lib";
use Project;

my $proj = Project->new({ 
    load_dicts => 1,
    load_minicategs_light => 1,
});


while(<STDIN>) { #dict_deprecated_categs_words_ru (письмо Кати от 19.02.2018)
    chomp;
    next if /^#/;

    my ($atom, $ctgs) = split /\t/;
    my @ctgs = split m{/}, $ctgs;

    my @phl;
    if ($atom =~ /^\./) { #атом
        @phl = atom_raw_phrases($atom);
    } else { #слово
        push @phl, $atom;
    }

    for my $phr (@phl) {
        if (@ctgs) {
            for my $ctg (@ctgs) {
                #print "$phr\t$ctg\n";
                print lc($phr), "\t$ctg\n";
            }
        } else {
            #print "$phr\t*\n";
            print lc($phr), "\t*\n";
        }
    }
}


#--- "сырые" фразы атома ---
sub atom_raw_phrases {
    my ($atom) = @_;

    my @phrases = ();                                                                                                                                                                                           
    my $category = $atom;                                                                                                                                                                  
    $category =~ s/^hier\./\./; #иерархический атом
    my @bfs_queue = ($category);                                                                                                                                                                                
    while (@bfs_queue) {                                                                                                                                                                                        
        my $root_category = shift(@bfs_queue);                                                                                                                                                                  
        push(@phrases, $proj->get_language('ru')->get_category_raw_phrases($root_category));                                                                                                                    
        push(@bfs_queue, $_) for $proj->categs_tree->get_minicateg_children($root_category);                                                                                                                    
    }
    return @phrases;
}
