package BM::Categories;

use strict;

use utf8;
use open ':utf8';
no warnings 'utf8';
use base qw(ObjLib::ProjPart);

use Data::Dumper;
use Time::HiRes qw/gettimeofday tv_interval/;
use BM::Phrase;
use Utils::Common;
use URI::Escape;
use List::Util qw(sum);

########################################################
# Интерфейс
########################################################

__PACKAGE__->mk_accessors(qw(
));

########################################################
# Инициализация
########################################################

sub init {
    my ($self) = @_;

    $self->{flag2desc} = {};        # описание флага
    $self->{categ2id} = {};         # id категории
    $self->{id2categ} = {};         # категория с данным id
    $self->{id2parentid} = {};      # id родителя категории с данным id
    $self->{id2children} = {};      # дети категории с данным id
    $self->{categ2flags} = {};      # флаги категории
    $self->{phrase2orig} = {};      # текст фразы в словаре категорий
    $self->{categ2phrases} = {};    # фразы категории 
    $self->{phrase2categs} = {};    # категории фразы

    $self->log($self->{name});
    
    # флаги
    if($self->{file_flags}) {
        open F, $self->{file_flags};
        $self->{flag2desc}{$_->[0]} = $_->[1] for map{chomp $_; [split "\t"]} <F>; ##no critic
        close F;
    }

    # загружаем фразы
    my $files_phrases = join(" ", @{$self->{files_phrases}});
    my $phrases_to_expand = [];
    open F, "cat $files_phrases |" or $self->log("ERROR: can't cat files with phrases");
    while(my $str = <F>) {
        next if $str =~ /^\#/;
        chomp $str;
        next if not $str;
        my ($my_id, $info, $ct, $phrases) = split("\t", $str);
        
        # проверка на ошибки в синтаксисе
        if(!$my_id || $info eq "" || !$ct || !$phrases) {
            $self->log("WARNING: bad categ line $str");
            next;
        }
        
        my ($parent_id, $flags) = split ":", $info;
        $self->_add_categ($my_id, $parent_id, $ct);
       
        # флаги категории
        $self->{categ2flags}{$ct} = {};
        if ($flags) {
            $self->{categ2flags}{$ct}{$_}++ for split ",", $flags;
            $self->log("WARNING: unknown categ flag $_") 
                for grep{!$self->{flag2desc}{$_} && !/^\_/} keys %{$self->{categ2flags}{$ct}};
        }

        # фразы категории
        if ($phrases =~ /^\$/) {
            my @a = split ":", $phrases;
            my $filename = substr $a[0], 1;
            $self->_load_phrases_from_file($ct, $filename)
        } elsif ($phrases) {
            $self->_add_phrase($_, $ct, $phrases_to_expand) for split(",", $phrases);
        }
    }
    close F;

    # TODO: phrases_to_expand

    # генерация объекта PhraseList
    $self->{phl} = $self->proj->phrase_list({phrases_arr => [keys %{$self->{phrase2categs}}]});

    $self->log("/ ".$self->{name}." (".scalar(keys %{$self->{categ2id}}).")");
}

sub _add_categ {
    my ($self, $my_id, $parent_id, $ct) = @_;
    
    if($self->{categ2id}{$ct}) {
        $self->log("WARNING: duplicate categ $ct");
    }
    
    if($self->{id2categ}{$my_id}) {
        $self->log("WARNING: duplicate categ id $my_id");
    }
    
    if($parent_id eq $my_id) {
        $self->log("WARNING: categ $ct can't be its own parent");
        $parent_id = "0";
    }

    $self->{categ2id}{$ct} = $my_id;
    $self->{id2categ}{$my_id} = $ct;
    $self->{id2parentid}{$my_id} = $parent_id;
    push @{$self->{id2children}{$parent_id} ||= []}, $ct;
}

sub _add_phrase {
    my ($self, $phrase, $categ, $phrases_to_expand) = @_;
  
    if(!$categ) {
        $self->log("WARNING: add_phrase $phrase, categ is empty");
        return;
    }

    return if !$phrase;

    # апаем при наличии соответствующего флага
    if($self->check_categ_flag($categ, "up")) {
        my $parent_categ = $self->categ_parent($categ);
        if(!defined($parent_categ)) {
            $self->log("WARNING: $categ must be defined after its parent");
            return;
        }
        $categ = $parent_categ;
    } 
    
    if ($phrase =~ /\[/) {
        # фраза содержит атомы -- нужно дополнительное разваливание
        push @$phrases_to_expand, [$phrase, $categ];
    } else {
        my $pobj = $self->proj->phrase($phrase);
        my $snorm = join " ", sort $pobj->uniqsnormwords;

        return if !$snorm;

        $self->{phrase2orig}{$snorm} = $phrase;
        $self->{phrase2categs}{$snorm} .= $categ . "/";
        $self->{categ2phrases}{$categ} .= $snorm . ",";
    }   
}

sub _load_phrases_from_file {
    my ($self, $ct, $filename) = @_;

    # TODO!!!
}

sub root_categs {
    my ($self) = @_;

    return @{$self->{id2children}{"0"} ||= []};
}

sub categ_children {
    my ($self, $categ) = @_;
    my $id = $self->{categ2id}{$categ};
    
    return () if !$id;
    return @{$self->{id2children}{$id} ||= []};
}

sub categ_parent {
    my ($self, $categ) = @_;
    my $id = $self->{categ2id}{$categ};
    my $pid = $self->{id2parentid}{$id};

    return $self->{id2categ}{$pid};
}

sub check_categ_flag {
    my ($self, $categ, $flag) = @_;

    return $self->{categ2flags}{$categ}{$flag} || 0;
}

sub get_subphrases_list {
    my ($self, $ph) = @_;
    my $final_phrase = $ph;

    # ищем подфразы от категорий
    my $phcats = $self->{phl}->search_subphrases_in_phrase($final_phrase);

    return $phcats;
}

sub get_subphrases_hash {
    my ($self, $ph) = @_;
    my $phl = $self->get_subphrases_list($ph);
    
    return {map{$_->snorm_phr => $self->{phrase2categs}{$_->snorm_phr}} $phl->phrases};
}

sub categorize_phrase {
    my ($self, $ph) = @_;
    my $h = $self->get_subphrases_hash($ph);
    my %cth;

    for my $subph (keys %$h) {
        $cth{$_}++ for grep{$_} split "/", $h->{$subph};
    }

    return sort keys %cth;
}

1;
