package BM::Dicts::Norm;
use strict;

# генерация словарей нормализации (norm, snorm)
# на основе леммера, частот по Рускорпоре, доп. словарей

use autodie qw( close );

use utf8;
use open ':utf8';

use base qw(ObjLib::ProjPart);

use List::Util qw(min max maxstr);
use Data::Dumper;

use Utils::Words qw(load_dict text2words word2norm norm2good stop4norm);
use Utils::Sys qw(
    uniq
    dir_files rotate_files
    load_json save_json
);


# Вспомогательные функции

# функции для просмотра и анализа склеек
sub analyze_synonyms_path {
    my $self = shift;
    my $w1 = shift;
    my $w2 = shift;
    my $lang = shift || 'ru';
    my ($index, $index_missprints);
    if (!$self->{"synpath_index_$lang"}) {
        my $info_file = $self->{info}{"syn_$lang"};
        $self->log("loading synpath index from '$info_file' ...");
        my $data = load_json($info_file);
        $self->{"synpath_index_$lang"} = (keys %$data > 2) ? $data : $data->{neighbour};    # костыль для перехода на новый формат файла $info_file    TODO remove this and use          $data->{neighbour};    
        $self->{"missprints_index_$lang"} = (keys %$data > 2) ? {} : $data->{good_info};    # костыль для перехода на новый формат файла $info_file    TODO remove this and use          $data->{good_info};
        $self->log("loading synpath index done!");
    }
    $index = $self->{"synpath_index_$lang"};
    $index_missprints = $self->{"missprints_index_$lang"};

    my ($n1, $n2) = map { word2norm($_, $lang) } ($w1, $w2);
    my ($g1, $g2) = map { norm2good($_, $lang) } ($n1, $n2);

    my $path = $self->synpath_search($index, $g1, $g2);
    return if !$path;

    my @res;
    #push @res, [$g1, $n1, $index_missprints->{$g1}{$n1}[0]]    if  $g1 ne $n1;   # TODO
    for (my $i=0; $i+1<@$path; ++$i) {
        my ($p1, $p2) = @{$path}[$i, $i+1];
        push @res, [ $p1, $p2, $index->{$p1}{$p2} ]; 
    }
    #push @res, [$n2, $g2, $index_missprints->{$g2}{$n2}[0]]    if  $g2 ne $n2;   # TODO
    return \@res;
}



sub synpath_search {
    my $self = shift;
    my ($index, $p, $q) = @_;

    return undef if !defined $index->{$p} or !defined $index->{$q};
    my (%parent, %seen, %active);
    $active{$p} = 1;
    MAIN: while (keys %active) {
        my @active = sort keys %active;
        for my $v (@active) {
            my @near = grep { !$seen{$_} } sort keys %{$index->{$v} || {}};
            $active{$_} = 1 for @near;
            $parent{$_} = $v for @near;
            $seen{$_} = 1 for @near;
            last MAIN if $seen{$q};
        }
        delete $active{$_} for @active;
    }
    return undef if !defined $parent{$q};
    my @path;
    my $curr = $q;
    while ($curr ne $p) {
        unshift @path, $curr;
        $curr = $parent{$curr};
    }
    unshift @path, $p;
    return \@path;
}

1;
