package BM::Language;
use strict;

use utf8;
use open ':utf8';

use std;
use base qw(ObjLib::ProjPart BM::MCached);

use Scalar::Util qw(weaken);

use BM::Categories::Atom;
use Utils::Common;
use Utils::Words;
use BM::Phrase;
use BM::PhraseCategs;
use BM::Prefilter;

__PACKAGE__->mk_accessors(qw(
    prefilter
    name
    is_default
    is_loaded

    layer_csnorms
    layer_atoms
    layer_atoms_named
    layer_categs
    layer_catalysts
    layer_catalysts2
    layer_dim2
    layer_uncertain
    layer_multiwords

    atom_codes
    code_atoms
));

sub init {
    my ($self) = @_;

    # слои категорий
    for my $layer_name (keys %{$Utils::Common::options->{categs_layers}}) {
        $self->{"layer_$layer_name"} = $self->proj->create_layer( 
            name        => $layer_name . ($self->is_default ? "" : "_".$self->name), 
            %{$Utils::Common::options->{categs_layers}{$layer_name}}
        );
    }

    # слова категорий
    $self->{snorm2minicategs} = {}; 

    # антислова категорий
    $self->{minicategs_antiwords} = {};
    $self->{minicateg2antiwords} = {};
    $self->{minicateg2antiwordsphl} = {};

    # атомы
    $self->{id2atom} = {};
    $self->{name2atom} = {};

    # фразы категорий
    $self->{minicateg2phrases} = {};
    $self->{categ2deleted} = {};

    # названия категорий
    $self->{ru2categ} = {};
    $self->{categ2ru} = {};

    # виртуальные категории
    $self->{virtual2phrases} = {};
    $self->{ru2virtual} = {};

    # коды атомов
    $self->{atom_codes} = {};
    $self->{code_atoms} = {};

    # широкие слова и фразы
    $self->{wide} = {};
    $self->{wph} = {};

    # слова с дефисами
    $self->{complex_words} = {};
}

# загрузка словарей -- уже должен быть загружен нормализатор
sub load_dicts {
    my ($self) = @_;

    my $prevlang = $self->proj->current_lang;
    $self->proj->current_lang($self->name);

    # prefilters' dictionary
    $self->prefilter;

    # широкие слова
    if($self->{wide_words}) {
        $self->log("wide ".$self->name);
        for my $fn (@{$self->{wide_words}}) {
            if(open F, "$fn") {
                while(<F>) {
                    my @words = map{word2snorm($_, $self->name)} text2words($_, $self->name);
                    $self->{wide}{$_}++ for @words;
                }
                close F;
            }
        }
        $self->log("/wide ".$self->name);
    }

    #Широкие фразы, состоящие не из широких слов
    if($self->{widephrs}) {
        $self->log("wide_phrases ".$self->name);
        open(FF2, $self->{'widephrs'} ) or $self->log("ERROR: $!");
        my @wide_phrs = <FF2>;
        close(FF2);
        $self->{wph}{join " ", $self->phrase($_)->uniqsnormwords}++ for @wide_phrs;
        $self->log("/ wide_phrases ".$self->name);
    }

    # составные слова с дефисами
    if($self->{dict_complex_words}) {
        $self->log("complex words");
        open F, $self->{dict_complex_words} or $self->log("ERROR: $!");
        while(<F>) {
            chomp;
            my @words = split "-";
            my @norms = map{$self->proj->phrase($_)->norm_phr} @words;
            next if !$norms[0] || !$norms[1];
            ($self->{complex_words}{$norms[0]} ||= {})->{$norms[1]}++;
        }
        close F;
        $self->log("/ complex words");
    }

    # атомы
    my $load_atoms = $self->proj->{load_atoms} || $self->proj->{load_minicategs};
    my $load_atoms_light = $self->proj->{load_atoms_light} || $self->proj->{load_minicategs_light};
    if(($load_atoms || $load_atoms_light) && $self->{atoms_source}) {
        $self->log("load_atoms " . $self->name);
        open F, $self->{atoms_source} or $self->log("ERROR: can't open ".$self->{atoms_source}." for reading"); 
        while(<F>) {
            chomp;
            my @a = split "\t";

            # разный формат для русского языка и остальных языков
            my ($id, $pid, $ru_name, $name, $phrases);
            if(@a == 4) {
                ($id, $pid, $name, $phrases) = @a;
                $ru_name = $name;
            } elsif(@a == 5) {
                ($id, $pid, $ru_name, $name, $phrases) = @a;
            } else {
                $self->log("WARNING: bad line in atoms: $_");
                next;
            }

            # список фраз атома
            my $phl;
            if ($load_atoms_light && !$load_atoms) {
                $phl = $self->proj->phrase_list();
            } else {
                $phl = $self->proj->phrase_list({ phrases_arr => [ $self->parse_phrases($phrases) ] });
            }

            my $atom = $self->create_atom({
                id              => $id,
                name            => $name,
                ru_name         => $ru_name,
                parent_id       => $pid,
                phrase_list     => $phl
            });

            $self->log("WARNING: duplicate atom '$name'") if !$atom;
        }
        close F;

        if($self->is_default) {
            # генерируем атомы из омонимов
            my $homs = $self->proj->homonyms->words;
            my $id_count = 0;
            for my $word (keys %$homs) {
                for my $name (keys %{$homs->{$word}}) {
                    my $title = ".$name";
                    my $id = "__homonym_".$id_count++;
                
                    $self->create_atom({
                        id          => $id,
                        name        => $title,
                        ru_name     => $title,
                        parent_id   => "ahmn",
                        phrase_list => $load_atoms_light ? 
                            $self->proj->phrase_list : 
                            $self->proj->phrase_list({ phrases_arr => [map{"$word $_"} keys %{$homs->{$word}{$name}}] })
                    });
                }
            }
        }

        $self->log("/ load_atoms " . $self->name);
    }

    $self->proj->current_lang($prevlang);
}

sub widephr_statinf_staticmap :CACHE {
    my ($self) = @_;

    my $file = $self->{widephr_statinf};
    unless ( -f $file ) {
        my $msg = "Could not create lmr: file '$file' does not exist";
        print STDERR "ERROR: $msg\n";
        die $msg;
    }
    my $lm = StaticMap->new($file);
    return $lm;
}

sub widephr_statinf_check {
    my ($self, $ph) = @_;
    return 0 if $self->name ne 'ru';
    return $self->widephr_statinf_staticmap->Value("$ph");
}

sub create_atom {
    my ($self, $h) = @_;

    if($self->{name2atom}{$h->{name}}) {
        return undef;
    }

    my $atom = BM::Categories::Atom->new($h);
    
    $atom->{language} = $self;
    weaken($atom->{language});
   
    # родительский атом
    my $pid = $atom->{parent_id};
    $atom->{parent} = $self->{id2atom}{$pid};
    if($pid && !$atom->{parent}) {
        $self->log("WARNING: atom '".$atom->{name}."' has unknown parent '$pid'");
    }

    $self->{name2atom}{$atom->{name}} = $atom;
    $self->{id2atom}{$atom->{id}} = $atom;

    return $atom;
}

sub prepare_layers {
    my ($self) = @_;

    # слой со словами
    for my $word (keys %{$self->{snorm2minicategs}}) {
        $self->layer_csnorms->add_text($word, join("/", sort keys %{$self->{snorm2minicategs}{$word}}));
    }

    for my $layer ($self->get_layers_list) {
        $layer->prepare;
    }
}

sub get_dict {
    my ($self, $name) = @_;
    return $self->proj->dict_manager->get_dict($name, $self->name);
}

# является ли слово широким
sub is_snorm_wide {
    my ($self, $w) = @_;

    return ($self->{wide}{$w})
        || ($w !~ /\D/) 
        || (length($w)==1) 
        || ($w =~ /^\d+\.\d+$/);
}

# является ли фраза широкой
sub is_phrase_wide {
    my ($self, $phr) = @_;
    my $text = join " ", $phr->uniqsnormwords;

    return 1 if $self->widephr_statinf_check($phr->norm_phr);
    return $self->{wph}{$text} ? 1 : 0;
}

# содержит ли фраза подфразу, которая никогда не является широкой
sub is_phrase_not_wide {
    my ($self, $phr) = @_;
    my $dict = $self->proj->dict_manager->get_dict("not_widephrs", $self->name);
    return $dict->phrase_list->search_subphrases_in_phrase($phr)->number_of_phrases;
}

sub is_phrase_wide_spam {
    my ($self, $phr) = @_;
    my $dict = $self->proj->dict_manager->get_dict("wide_spam_phrases", $self->name);
    return $dict->phrase_list->phexists($self->proj->phrase($phr->norm_phr));
}

# список всех категорийных слоёв
sub get_layers_list {
    my ($self) = @_;

    return map{$self->{"layer_$_"}} keys %{$Utils::Common::options->{categs_layers}};
}

# создать префильтр для данного языка
sub prefilter {
    my ($self, $file) = @_;
    $file ||= $Utils::Common::options->{dict_prefilters};
    $self->{"prefilter_$file"} = BM::Prefilter->new({
        language    => $self,
        file        => $file,
    }) if !$self->{"prefilter_$file"};
    return $self->{"prefilter_$file"};
}

# создать фразу на данном языке
sub phrase {
    my ($self, $text, %opts) = @_;
    my $ph = $opts{snormed} ? $self->proj->snormed_phrase($text) : $self->proj->phrase($text);

    $ph->{lang} = $self->name;
    $ph->{language} = $self;

    return $ph;
}

sub phrase_list {
    my $self = shift;
    my $phl = $self->proj->phrase_list(@_);
    $phl->{lang} = $self->name;
    for my $phr ($phl->phrases) {
        $phr->{lang} = $self->name;
        $phr->{language} = $self;
    }
    return $phl;
}

# работа с фразами, которые были удалены
sub set_deleted_phrase {
    my ($self, $categ, $text) = @_;

    ($self->{categ2deleted}{$categ} ||= {})->{$text}++;
}

sub is_phrase_deleted {
    my ($self, $categ, $text) = @_;

    return ($self->{categ2deleted}{$categ} || {})->{$text};
}

# категоризатор страниц
sub pages_categories :CACHE {
    my ($self) = @_;
    
    return $self->proj->{pages_categories}{$self->name} || $self->proj->default_language->pages_categories;
}

# названия категорий
sub set_category_translation {
    my ($self, $ru, $local) = @_;

    my $curr = $self->{categ2ru}{$local};
    if($curr && $curr ne $ru) {
        $self->log("WARNING: ambiguous translation '$local' for '$curr' and '$ru'");
    }

    $self->{ru2categ}{$ru} = $local;
    $self->{categ2ru}{$local} = $ru;
}

sub set_virtual_translation {
    my ($self, $ru, $local) = @_;

    $self->{ru2virtual}{$ru} = $local;
}

# фразы виртуальной категории
sub set_virtual_phrases {
    my ($self, $name, $phrases_text) = @_;

    $self->{virtual2phrases}{$name} = [];
    for my $ph (grep{$_} $self->parse_phrases($phrases_text)) {
        push @{$self->{virtual2phrases}{$name}}, $ph if $ph;
    }
}

sub get_virtual_phrases {
    my ($self, $name) = @_;

    return @{$self->{virtual2phrases}{$name} || []};
}

# определена ли категория для данного языка
sub has_category {
    my ($self, $categ) = @_;

    return defined($self->{ru2categ}{$categ});
}

# список категорий на данном языке
sub get_categories_list {
    my ($self) = @_;

    return keys %{$self->{categ2ru}};
}

# перевод названия категории с русского
sub category_from_ru {
    my ($self, $categ) = @_;

    return $self->{ru2categ}{$categ} || $categ;
}

sub virtual_from_ru {
    my ($self, $categ) = @_;

    return $self->{ru2virtual}{$categ} || $categ;
}

# перевод названия категории на русский
sub category_to_ru {
    my ($self, $categ) = @_;

    return $self->{categ2ru}{$categ} || $categ;
}

sub dumper_text {
    my ($self) = @_;
    my $t = "Language ".$self->name;
    $_[0] = \$t;
    return @_; 
}

sub dumper_text_lite {
    my ($self) = @_;
    my $t = "Language ".$self->name;
    $_[0] = \$t;
    return @_; 
}

# из текста получить массив строк-фраз
sub parse_phrases {
    my ($self, $phrases) = @_;
    my @result = ();
    
    return () if !defined($phrases);

    if ($phrases =~ /^\$/) {
        my @a = split ":", $phrases;
        my $filename = substr $a[0], 1;

        # фразы из внешнего словаря
        if(!open FCT, $Utils::Common::options->{dirs}{dicts}."/$filename") {
            $self->log("ERROR: can't open $filename (see '$phrases')");
        } else {
            my %flags;

            for (@a[1..$#a]) {
                my ($flag_name, $flag_phrases) = split "=";
                $flags{$flag_name} = [split ",", $flag_phrases];
            }
            
            while(<FCT>) {
                chomp;
                my ($ph, $flags) = split ":";

                if($flags) {
                    for my $flag (split ",", $flags) {
                        push @result, "$ph $_" for @{$flags{$flag}};
                    }
                } else {
                    push @result, $ph;
                }
            }
            close FCT;
        }
    } elsif ($phrases) {
        @result = BM::PhraseCategs::split_phrases_text($phrases);
    }

    @result = map{s/^\s+|\s+$//g; $_} grep{defined($_)} @result; ##no critic

    return @result;
}

# массив фраз категории в том виде, как они были записаны в словаре
sub get_category_raw_phrases {
    my ($self, $categ) = @_;

    return (map{$_->get_category_raw_phrases($categ)} ($self->layer_categs, $self->layer_catalysts2, $self->layer_atoms));
}

sub get_atoms_list {
    my ($self) = @_;

    return map{$self->{id2atom}{$_}} keys %{$self->{id2atom}};
}

sub get_minicateg_antiwords {
    my ($self, $categ) = @_;
    return sort keys %{$self->{minicateg2antiwords}{$categ} || {}};
}

1;
