package BM::Homonyms;

use utf8;
use open ':utf8';

use std;
use base qw(ObjLib::ProjPart);

use BM::Phrase;

use Utils::Sys;
use Utils::Words;

use Data::Dumper;
use List::Util qw(max);

########################################################
# Интерфейс
########################################################
#

__PACKAGE__->mk_accessors(qw(
    file
));

sub init {
    my ($self) = @_;
}

sub load_homonyms {
    my ($self) = @_;
    my $adj = {};

    $self->log("homonyms");

    $self->{words} = {};
    $self->{minuswords} = {};

    # загружаем данные
    open F, '<', $self->{file};
    while(<F>) {
        chomp;
        my ($words, $theme, $context, $minus) = split "\t";
        next if !$context;

        my @forms = map{word2norm($_)} split ",", $words;
        my $plw = {map{word2snorm($_) => 1} split /\s*,\s*/, $context};
        my $mw = {map{$_ => 1} grep{$_} map{word2snorm($_)} split /\s*,\s*/, ($minus || "")};
        for my $word (@forms) {
            ($adj->{$word} ||= {})->{$_}++ for @forms;
            ($self->minuswords->{$word} ||= {})->{$theme} = $mw;
            ($self->words->{$word} ||= {})->{$theme} = $plw;
        }
    }
    close F;

    # строим кластеры
    my $clusters = {};
    for my $word (keys %$adj) {
        my $cl = [];
        my $q = [$word];
        
        next if $clusters->{$word};

        while(@$q) {
            my $w = pop @$q;

            next if $clusters->{$w};

            $clusters->{$w} = $cl;
            push @$cl, $w;
            push @$q, $_ for grep{!$clusters->{$_}} keys %{$adj->{$w}};
        }

        # все темы текущего кластера
        my $themes = {};
        for my $w (@$cl) {
            $themes->{$_} ||= [$self->words->{$w}{$_}, $self->minuswords->{$w}{$_}] for keys %{$self->words->{$w}};
        }
        for my $w (@$cl) {
            $self->words->{$w}{$_} ||= $themes->{$_}[0] for keys %$themes;
            $self->minuswords->{$w}{$_} ||= $themes->{$_}[1] for keys %$themes;
        }
    }

    $self->log("/ homonyms (".scalar(keys %{$self->words}).")");
}

sub words {
    my ($self) = @_;
    $self->load_homonyms unless $self->{words};
    return $self->{words};
}

sub minuswords {
    my ($self) = @_;
    $self->load_homonyms unless $self->{minuswords};
    return $self->{minuswords};
}

sub get_context_words {
    my ($self, $homonym, @words) = @_;
    my @result;

    if($self->words->{$homonym}) {
        @words = map{word2snorm($_)} @words;
        for my $name (sort keys %{$self->words->{$homonym}}) {
            my @a = grep{$self->words->{$homonym}{$name}{$_}} @words;
            push @result, @a;
        }
    }

    return @result;
}

# определяет значение омонима
# на входе: омоним, контекст (список слов)
# на выходе: список значений (тем)
sub get_context {
    my ($self, $homonym, @words) = @_;
    my @result;

    if($self->words->{$homonym}) {
        @words = map{word2snorm($_)} @words;
        for my $name (sort keys %{$self->words->{$homonym}}) {
            my @a = grep{$self->words->{$homonym}{$name}{$_}} @words;
            push @result, $name if @a;
        }
    }

    return @result;
}

1;

