package BM::BMClient::SimpgraphCdict;

use strict;

use utf8;
use open ':utf8';
no warnings 'utf8';
use base qw(BM::BMClient::CdictClient);

use IO::Select;
use Data::Dumper;
use Time::HiRes qw/gettimeofday tv_interval/;
use BM::Phrase;
use Utils::Common;
use Socket;

########################################################
# Интерфейс
########################################################

########################################################
# Инициализация
########################################################

__PACKAGE__->mk_accessors(qw(
));

sub _write_simpgraph {
    my ($self, $file_name, $dict_name, %prm) = @_;
    my $lines = 0;

    $self->log("load '$dict_name' <- '$file_name'");
    open (F, $file_name) or die($!);
    while(my $line = <F>) {
        chomp $line;
        if ($prm{filter_porno}) {
            my ($key, $assocs_line) = split /\t/, $line;
            next if $self->proj->phrase( $key )->is_porno_phrase;

            my @assocs_wghts = map {[ split /:/ ]} split /,/, $assocs_line;
            @assocs_wghts = grep {
                not $self->proj->phrase( $_->[0] )->is_porno_phrase     # TODO другие языки
            } @assocs_wghts;
            next unless @assocs_wghts;

            $line = $key . "\t" . join(",", map { join(":", @$_) } @assocs_wghts);
        }
        $line =~ s/,/ , /g;
        $line =~ s/:/ /g;
        print G "$dict_name\t$line\n";
        $lines = $.;
    }
    close F;
    $self->log("/ load '$dict_name' <- '$file_name', $lines lines");
}

sub prepare_data {
    my ($self) = @_;
    my $fn_temp = $self->temp_dir . "/simpgraph";

    my $proj = $self->proj;
    my $cdict_dir = $proj->cdict_client->server_dir;

    $self->log("preprocess");

    # предобработка симпграфа
    open G, "> $fn_temp" or die($!);
    $self->_write_simpgraph("zcat " . $Utils::Common::options->{RUserClicks_params}{result_nonsyn_gz1} . " | ", "sguca", filter_porno => 1 );
    $self->_write_simpgraph("zcat " . $Utils::Common::options->{RUserClicks_params}{result_nonsyn_gz2} . " | ", "sgucb", filter_porno => 1 );
    $self->_write_simpgraph("zcat " . $Utils::Common::options->{RUserClicks_params}{result_nonsyn_gz3} . " | ", "sgucc", filter_porno => 1 );
    $self->_write_simpgraph("zcat " . $Utils::Common::options->{RUserClicks_params}{result_nonsyn_ucb_gzf} . " | ", "sgucg", filter_porno => 1 );
    $self->_write_simpgraph($Utils::Common::options->{BroadmatchDirectPhrases_params}{united_simpgraph_extended}, "sg", filter_porno => 1 );
    $self->_write_simpgraph($Utils::Common::options->{BroadmatchDirectPhrases_params}{united_simpgraph_context_syn}, "sgcon", filter_porno => 1 );
    $self->_write_simpgraph($Utils::Common::options->{BroadmatchDirectPhrases_params}{united_simpgraph_winter}, "sgwin", filter_porno => 1 );
    close G;
    $self->log("/ preprocess");

    $self->log("pack");
    $self->prepare_data_from_file($fn_temp, prepare_single_file => 1);

    $self->log("result: " . $self->single_file . ", size:" . sprintf("%.2f", (( -s $self->single_file ) / 1024 / 1024 / 1024 )) . "G");

    $self->log("/ pack");
}

1;

