package Cmds::DeprecatedWordsChecker;

use strict;
use utf8;
use open ':utf8';
use base qw(ObjLib::Obj);
use Utils::Common;
use Utils::Array;
use Cmds::PhraseBlock;
use Data::Dumper;

__PACKAGE__->mk_accessors(qw(
    proj
));

sub init {
    my ($self) = shift;

    my $proj = $self->proj;
    $self->{rules} = {};
    for my $lang (qw(ru en tr)) {
        $self->{rules}{$lang} = {};
        my $dict_path = $Utils::Common::options->{deprecated_categs_words}{$lang};
        open (F, $dict_path) or die ("can't read \"$dict_path\"");
        while (<F>) {
            chomp;
            next if /^#/;  # skip comments
            my $rule = {};
            my ($deprecated_words_str, $conditions_str) = split /\t/, $_;
            my @conditions = grep {$_} map {$proj->get_category_id($_)} split /\//, $conditions_str;
            if ($deprecated_words_str !~ /^\./) {
                my $snorm_text = $proj->get_language($lang)->phrase($deprecated_words_str)->snorm_phr_uniq;
                $self->{rules}{$lang}{$snorm_text} = {};
                $self->{rules}{$lang}{$snorm_text}{$_} = 1 for @conditions;
            } else {
                my $atom_id = $proj->get_category_id($deprecated_words_str);
                my @atom_phrases = @{$proj->category_interface->get_phrases($atom_id, $lang)};
                for my $atom_phrase (@atom_phrases) {
                    my $snorm_text = $proj->get_language($lang)->phrase($atom_phrase)->snorm_phr_uniq;
                    $self->{rules}{$lang}{$snorm_text}{$_} = 1 for @conditions;
                }
                $self->{rules}{$lang}{$deprecated_words_str} = {};
                $self->{rules}{$lang}{$deprecated_words_str}{$_} = 1 for @conditions;
            }
        }
        close F;
    }
}

sub is_subcategory {
    my ($self, $cat_id, $categories_hash) = @_;

    return 0 if !$categories_hash;

    while ($cat_id) {
        if ($categories_hash->{$cat_id}) {
            return 1;
        }
        $cat_id = $self->proj->get_parent_id($cat_id);
    }

    return 0;
}

sub contains_deprecated_words {
    my ($self, $text, $cat_id, $lang) = @_;

    my $proj = $self->proj;

    $text =~ s/<.*>|".*"|{.*}//g;
    my $atoms = $proj->retrieve_atoms_from_text($text);
    $text = $proj->remove_atoms_from_text($text);
    my @snorm_words = $proj->get_language($lang)->phrase($text)->uniqsnormwords;
    push @snorm_words, $_->{atom} for @$atoms;
    my %snorm_words_hash = map {$_ => 1} @snorm_words;
    for my $deprecated_word (keys %{$self->{rules}{$lang}}) {
        my @words = split / /, $deprecated_word;
        my @grep_words = grep {$snorm_words_hash{$_}} @words;
        return $deprecated_word if @words == @grep_words &&
                                   !$self->is_subcategory($cat_id, $self->{rules}{$lang}{$deprecated_word});
    }
    
    return 0;
}

sub _find {
    my ($self, $text, $cat_id, $lang) = @_;

    my $proj = $self->proj;
    my $language = $proj->get_language($lang);

    $text =~ s/<[^>]*>|"[^"]*"|{[^}]*}//g;
    my @atoms_info = @{$language->phrase($text)->retrieve_atoms};
    my @named_atoms = map {grep {/^\./} @$_} @atoms_info;
    my @anonymous_atoms = map {grep {/^[^\.]/} @$_} @atoms_info;
    $text = $language->phrase($text)->erase_atoms;
    $text .= " $_" for @anonymous_atoms;
    my %snorm_words = map {$_ => 1} $language->phrase($text)->uniqsnormwords;
    my @result = ();
    for my $deprecated_word (keys %{$self->{rules}{$lang}}) {
        my @deprecated_words = split / /, $deprecated_word;
        push @result, $deprecated_word
            if (grep {$snorm_words{$_}} @deprecated_words) == @deprecated_words &&
               !$self->is_subcategory($cat_id, $self->{rules}{$lang}{$deprecated_word});
    }
    for my $named_atom (@named_atoms) {
        push @result, $named_atom
            if $self->{rules}{$lang}{$named_atom} && !$self->is_subcategory($cat_id, $self->{rules}{$lang}{$named_atom});
    }
    
    return \@result;
}

sub find_deprecated_words {
    my ($self, $text, $cat_id, $lang) = @_;

    my $proj = $self->proj;
    my $language = $proj->get_language($lang);
    my @deprecated_words = @{$self->_find($text, $cat_id, $lang)};

    my $snorm_to_orig = {};
    my @words = $language->phrase($text)->words;
    $snorm_to_orig->{$language->phrase($_)->snorm_phr}{$_} = 1 for @words;

    my @result = map{
        /^\./ ? $_ : join(' ', map {"$_\[" . join(',', keys %{$snorm_to_orig->{$_}}) . "]"} split / /, $_)
    } @deprecated_words;

    return \@result;
}

sub remove_deprecated_words {
    my ($self, $text, $cat_id, $lang) = @_;

    my $proj = $self->proj;
    my $language = $proj->get_language($lang);

    my @deprecated_words = @{$self->_find($text, $cat_id, $lang)};
    my %snorm_words = map {$_ => 1} $language->phrase($text)->uniqsnormwords;
    for my $deprecated_word (@deprecated_words) {
        my @words = split / /, $deprecated_word;
        next if (grep {$snorm_words{$_}} @words) != @words;
        $snorm_words{$_} = 0 for @words;
    }

    return join (' ', sort grep {$snorm_words{$_}} keys %snorm_words);
}

1;
