#!/usr/bin/perl
use strict;

use utf8;
use open ":utf8";

use Getopt::Long;
use FindBin;
use lib "$FindBin::Bin/../lib";
use lib "$FindBin::Bin/../wlib";
use Utils::Common;
use Utils::Sys qw/
    get_file_lock
    release_file_lock
    handle_errors
    dir_files
    modtime
    read_ts_file
/;
use CatalogiaMediaProject;
use BM::Phrase;
use Data::Dumper;

handle_errors();

get_file_lock()
    or die "infuse_web_categs.pl is already runned!";

my $proj = CatalogiaMediaProject->new({ no_auth => 1, no_form => 1 });
my %args;
GetOptions(\%args, 'test=s');

# categories text fields
infuse_categories_text_fields();

# удаление файла с результатами
$proj->do_sys_cmd("rm " . $Utils::Common::options->{infuse_web_result_file} ) if -e $Utils::Common::options->{infuse_web_result_file};

# обновление групп категорий
$proj->mediagroups->save_data;
$proj->videodirectgroups->save_data;
$proj->interests_mapping->save_data;

my $result_data = { };
my $result_masks = { subphraser => 1, catmedia_db => 2 };

# фразы, прошедшие модерацию
my $phrases = exists $args{test}
    ? read_ts_file($args{test})
    : $proj->get_accepted_user_phrases;

my $categs_add = {};
my $categs_del = {};
my $categs_antiwords = {};
my $categs_flags = {};
my $categs_parents = {};
my $deleted_categs = {};
my $renamed_categs = {};
my $syn_pairs = {};
my $csyn_pairs = {};
my $lemmerfix_pairs = {};
my $widespamphrases_pairs = {};
my $deleted_syn_pairs = {};
my $deleted_flags = {};
my $new_categories = [];
my @added_prefilters = ();
my @deleted_prefilters = ();
my $lang_catid_vadd_phrases = {};
my $lang_catid_vdelete_phrases = {};
my $apply_virtual = {};
my @goodphrases = ();
my $category_added_nephews = {};
my $category_deleted_nephews = {};

for my $phr (sort{$a->{UpdateTime} cmp $b->{UpdateTime}} @$phrases) {
    my $text = $phr->{InitialPhrase};
    # обратная замена html-тэгов
    $text =~ s/\&gt;/>/g;
    $text =~ s/\&lt;/</g;
    $text =~ s/\&quot;/"/g;
    $text =~ s/\n|\r/ /g;

    # принудительно удаляем табы
    $text =~ s/\t/ /g;

    if($phr->{Action} eq "Add") {
        push @{($categs_add->{$phr->{Language}} ||= {})->{$phr->{CatID}} ||= []}, $text;
    } elsif ($phr->{Action} eq "Delete") {
        push @{($categs_del->{$phr->{Language}} ||= {})->{$phr->{CatID}} ||= []}, $text;
    } elsif ($phr->{Action} eq "AddFlag") {
        push @{$categs_flags->{$phr->{CatID}} ||= []}, $text;
        $result_data->{catmedia_db}++;
    } elsif ($phr->{Action} eq "DeleteFlag") {
        push @{$deleted_flags->{$phr->{CatID}} ||= []}, $text;
        $result_data->{catmedia_db}++;
    } elsif ($phr->{Action} eq "DeleteCategory") {
        $deleted_categs->{$phr->{CatID}} = [];
        $result_data->{subphraser}++;
        $result_data->{catmedia_db}++;
    } elsif ($phr->{Action} eq "RenameCategory") {
        ($renamed_categs->{$phr->{Language}} ||= {})->{$phr->{CatID}} = $text;
    } elsif ($phr->{Action} eq "AddSynPair") {
        push @{$syn_pairs->{$phr->{Language}} ||= []}, $phr->{InitialPhrase};
        $result_data->{catmedia_db}++;
    } elsif ($phr->{Action} eq "AddContextSyn") {
        push @{$csyn_pairs->{$phr->{Language}} ||= []}, $phr->{InitialPhrase};
        $result_data->{catmedia_db}++;
    } elsif ($phr->{Action} eq "AddLemmerFix") {
        push @{$lemmerfix_pairs->{$phr->{Language}} ||= []}, $phr->{InitialPhrase};
        $result_data->{catmedia_db}++;
    } elsif ($phr->{Action} eq "AddWideSpamPhrase") {
        push @{$widespamphrases_pairs->{$phr->{Language}} ||= []}, $phr->{InitialPhrase};
        $result_data->{catmedia_db}++;
    } elsif ($phr->{Action} eq "AddAntiword") {
        push @{($categs_antiwords->{$phr->{Language}} ||= {})->{$phr->{CatID}} ||= []}, $text;
        $result_data->{subphraser}++;
        $result_data->{catmedia_db}++;
    } elsif ($phr->{Action} eq "DeleteSynPair") {
        push @{$deleted_syn_pairs->{$phr->{Language}} ||= []}, $text;
        $result_data->{catmedia_db}++;
    } elsif ($phr->{Action} eq "AddCategory") {
        my ($id, $parent_id) = split ":", $phr->{CatID};
        push @$new_categories, [$id, $parent_id, $phr->{InitialPhrase}];
        $result_data->{subphraser}++;
        $result_data->{catmedia_db}++;
    } elsif($phr->{Action} eq "ChangeParent") {
        $categs_parents->{$phr->{CatID}} = [$phr->{InitialPhrase}];
        $result_data->{subphraser}++;
        $result_data->{catmedia_db}++;
    } elsif ($phr->{Action} eq 'AddPrefilter') {
        push @added_prefilters, join("\t", @{$phr}{qw/Language InitialPhrase/});
        $result_data->{subphraser}++;
        $result_data->{catmedia_db}++;
    } elsif ($phr->{Action} eq 'DeletePrefilter') {
        push @deleted_prefilters, join("\t", @{$phr}{qw/Language InitialPhrase/});
        $result_data->{subphraser}++;
        $result_data->{catmedia_db}++;
    } elsif ($phr->{Action} eq 'VAdd') {
        push @{$lang_catid_vadd_phrases->{$phr->{Language}}{$phr->{CatID}}}, $text;
        $result_data->{subphraser}++;
        $result_data->{catmedia_db}++;
    } elsif ($phr->{Action} eq 'VDelete') {
        push @{$lang_catid_vdelete_phrases->{$phr->{Language}}{$phr->{CatID}}}, $text;
        $result_data->{subphraser}++;
        $result_data->{catmedia_db}++;
    } elsif ($phr->{Action} eq "ApplyVirtualFull" || $phr->{Action} eq "ApplyVirtual") {
        my $categ = $proj->get_category_full($phr->{CatID}, "ru");
        my $virtual = $proj->get_virtual_category_full($phr->{InitialPhrase}, "ru");
    
        if($categ && $virtual) {
            ($apply_virtual->{$categ->{CategoryName}} ||= {})->{$virtual->{CategoryName}} = $phr->{Action} eq "ApplyVirtualFull" ? "add" : "add_once";
            $result_data->{subphraser}++;
            $result_data->{catmedia_db}++;
        }
    } elsif ($phr->{Action} eq 'AddNephew') {
        my $domain_category = $proj->get_category_name($phr->{CatID}, 'ru');
        my $image_category = $proj->get_category_name($phr->{InitialPhrase}, 'ru');
        $result_data->{catmedia_db}++;
        $category_added_nephews->{$domain_category}{$image_category} = 1;
    } elsif ($phr->{Action} eq 'DeleteNephew') {
        my $domain_category = $proj->get_category_name($phr->{CatID}, 'ru');
        my $image_category = $proj->get_category_name($phr->{InitialPhrase}, 'ru');
        $category_deleted_nephews->{$domain_category}{$image_category} = 1;
        $result_data->{catmedia_db}++;
    } else {
        #die("unknown action " . $phr->{Action});
        warn("unknown action " . $phr->{Action});
        next; 
    }
    push(@goodphrases, $phr);
}
$phrases = \@goodphrases;

# обновление базы
my $db_lang_categs = {};
for my $h ($categs_add, $categs_del, $renamed_categs) {
    for my $lang (sort keys %$h) {
        ($db_lang_categs->{$lang} ||= {})->{$_}++ for keys %{$h->{$lang}};
    }
}
for my $lang (sort keys %$db_lang_categs) {
    for my $categ_id (sort keys %{$db_lang_categs->{$lang}}) {
        my $categ = $proj->categories_dict->List({"Language" => $lang, "CatID" => $categ_id})->[0];

        if(!$categ) {
            $proj->log("WARNING: unknown categ '$categ_id'");
            next;
        }

        # добавление/удаление фраз
        my %hdel = map{$_ => 1} @{($categs_del->{$lang} ||= {})->{$categ_id}};
        my @phrases = grep{!$hdel{$_}} split /\s*,\s*/, $categ->{CategoryPhrases};
        push @phrases, @{($categs_add->{$lang} ||= {})->{$categ_id} || []};
        $categ->{CategoryPhrases} = join(",", @phrases);

        # переименование
        my $new_name = ($renamed_categs->{$lang} || {})->{$categ_id};
        $categ->{CategoryName} = $new_name if $new_name;

        $proj->categories_dict->Add( [$categ], { replace => 1 } );
    }
}

# новые категории
my $temp_categs_web = $proj->{temp_dir}."/categs_web";
$proj->do_sys_cmd("cp ".$Utils::Common::options->{categs_from_web}." $temp_categs_web");
open G, ">> $temp_categs_web" or die($!);
for my $item (@$new_categories) {
    print G join("\t", @$item, "---")."\n";
}
close G;
$proj->do_sys_cmd("mv $temp_categs_web ".$Utils::Common::options->{categs_from_web});

# правки в основных словарях
for my $lang (sort keys %{$Utils::Common::options->{categs_languages}}) {
    my $h = $renamed_categs->{$lang};
    next if !$h;

    my $dict_file = $Utils::Common::options->{categs_languages}{$lang};
    my $categs_data = {};

    $proj->log("updating $dict_file");
    
    # текущие данные
    open F, $dict_file or die($!);
    while(<F>) {
        chomp;
        my @a = split "\t";
        $categs_data->{$a[0]} = [$a[1], $a[2], $a[3], $a[4] || ""];
    }
    close F;

    # обновление данных
    for my $cid (sort keys %$h) {
        if(!$categs_data->{$cid}) {
            my $categ = $proj->get_category_full($cid, "ru");
            $categs_data->{$cid} = [$categ->{ParentID}, $categ->{CategoryName}, $h->{$cid}, ""];
        } else {
            $categs_data->{$cid}[2] = $h->{$cid};
        }
    }

    # сохранение результатов
    my $temp_file = $proj->{temp_dir} . "/categs_web";
    open F, "> $temp_file" or die($!);
    for my $cid (sort keys %$categs_data) {
        print F join("\t", $cid, @{$categs_data->{$cid}})."\n";
    }
    close F;

    $proj->do_sys_cmd("mv $temp_file $dict_file");
}

# синонимы
for my $lang (sort keys %$syn_pairs) {
    my $dict_file = $Utils::Common::options->{DictNorm}{"synonyms_web_$lang"};

    $proj->log("updating $dict_file");

    die if !-e $dict_file;
    open F, ">> $dict_file" or die($!);
    for my $pair (@{$syn_pairs->{$lang}}) {
        $pair =~ s/\:/, /;
        print F "$pair\n";
    }
    close F;

    $proj->log("done");
}
for my $lang (sort keys %$deleted_syn_pairs) {
    my $dict_file = $Utils::Common::options->{DictNorm}{"synonyms_badpairs" . ($lang eq "ru" ? "" : "_$lang")};

    $proj->log("updating $dict_file");

    open F, ">> $dict_file" or die($!);
    for my $pair (@{$deleted_syn_pairs->{$lang}}) {
        $pair =~ s/\:/\t/;
        print F "$pair\n";
    }
    close F;

    $proj->log("done");
}

# применение виртуальных категорий
my $dict_virtual_gen = $Utils::Common::options->{categs_virtual_gen};
open F, $dict_virtual_gen or die($!);
while(<F>) {
    chomp;
    my ($categ, $virtual, $cmd) = split "\t";
    ($apply_virtual->{$categ} ||= {})->{$virtual} = $cmd;
}
close F;
open F, "> $dict_virtual_gen" or die($!);
for my $categ (sort keys %$apply_virtual) {
    for my $virtual (sort keys %{$apply_virtual->{$categ}}) {
        print F join("\t", $categ, $virtual, $apply_virtual->{$categ}{$virtual}) . "\n";
    }
}
close F;

# контекстные синонимы
for my $lang (sort keys %$csyn_pairs) {
    my $dict_file = $Utils::Common::options->{ContextSyns_params}{"context_synonyms_web_$lang"};
    update_dict_file( $proj, $dict_file, $csyn_pairs->{$lang} );
}

# леммер-фикс 
for my $lang (sort keys %$lemmerfix_pairs) {
    my $dict_file = $Utils::Common::options->{DictNorm}{"lemmer_fix_web_$lang"};
    update_dict_file( $proj, $dict_file, $lemmerfix_pairs->{$lang} );
}

# спам-фразы
for my $lang (sort keys %$widespamphrases_pairs) {
    my $dict_file = $Utils::Common::options->{SpamPhrases_params}{"spam_phrases_wide_$lang"};
    update_dict_file( $proj, $dict_file, $widespamphrases_pairs->{$lang} );
}

# nephews
$proj->log("updating nephews");
my $nephews_file = $Utils::Common::options->{dict_categs_nephews};
my $category_nephews = {};
open (F, $nephews_file) or die ("can't read '$nephews_file'");
while (<F>) {
    chomp;
    next if /^#/;  # skip comments
    my ($domain_category, $image_categories) = split /\t/, $_;
    $category_nephews->{$domain_category}{$_} = 1 for split /\//, $image_categories;
}
close F;
for my $domain_category (sort keys %$category_added_nephews) {
    $category_nephews->{$domain_category}{$_} = 1 for keys %{$category_added_nephews->{$domain_category}};
}
for my $domain_category (sort keys %$category_deleted_nephews) {
    $category_nephews->{$domain_category}{$_} = 0 for keys %{$category_deleted_nephews->{$domain_category}};
}
my $temp_file = $proj->{temp_dir} . "/nephews";
open (F, "> $temp_file") or die ("cant't write to '$temp_file'");
for my $domain_category (sort keys %{$category_nephews}) {
    my @image_categories = grep {$category_nephews->{$domain_category}{$_}} sort keys %{$category_nephews->{$domain_category}};
    print F $domain_category . "\t" . join('/', @image_categories) . "\n" if @image_categories;
}
close F;
$proj->do_sys_cmd("mv $temp_file $nephews_file") or die("mv $temp_file $nephews_file failed");
$proj->log("/ updating nephews");

# prefilters
$proj->log("update prefilters");
my %prefilter2state = ();
# dict prefilters
open (my $prefilters_fh, '<', $Utils::Common::options->{dict_prefilters}) or die($!);
while (my $line = <$prefilters_fh>) {
    chomp($line);
    $prefilter2state{$line} = 1;
}
close $prefilters_fh or die($!);
# users prefilters
$prefilter2state{$_} = 1 for @added_prefilters;
$prefilter2state{$_} = 0 for @deleted_prefilters;
# infuse prefilters
$temp_file = $Utils::Common::options->{dirs}{temp} . '/dict_prefilters';
open (my $temp_fh, '>', $temp_file) or die($!);
print $temp_fh "$_\n" for grep {$prefilter2state{$_}} sort keys %prefilter2state;
close $temp_fh or die($!);
Utils::Sys::do_sys_cmd("mv $temp_file " . $Utils::Common::options->{dict_prefilters});
$proj->log("/ update prefilters");

# domain flags
$proj->log("update domain flags");
my $df_temp_file = $Utils::Common::options->{dirs}{temp} . '/dict_domain_flags';
open (my $df_temp_fh, '>', $df_temp_file) or die($!);

my %domain_flags = map {$_->{Domain} => $_->{Flags}} @{$proj->domain_flags->List};

print $df_temp_fh $_."\t".$domain_flags{$_}."\n" for grep {$domain_flags{$_}} sort keys %domain_flags;
close $df_temp_fh or die($!);
Utils::Sys::do_sys_cmd("mv $df_temp_file " . $Utils::Common::options->{dict_domain_flags});
$proj->log("/ update domain flags");


# virtual categories
for my $lang (sort keys %{$Utils::Common::options->{categs_virtual_languages}}) {
    next if !$lang_catid_vadd_phrases->{$lang} && !$lang_catid_vdelete_phrases->{$lang};
    $proj->log("updating $lang virtual categories");
    my $virtual_categories_file = $Utils::Common::options->{categs_virtual_languages}{$lang};
    open (F, $virtual_categories_file) or die("can't read '$virtual_categories_file'");
    my $temp_file = $proj->{temp_dir} . "/virtual_categories_$lang";
    open G, "> $temp_file" or die("can't write to '$temp_file'");
    while (<F>) {
        chomp;
        my ($cat_id_colon_flags_str, $ru_cat_name, $cat_name, $phrases_str);
        if ($lang eq 'ru') {
            ($cat_id_colon_flags_str, $cat_name, $phrases_str) = split /\t/, $_;
        } else {
            ($cat_id_colon_flags_str, $ru_cat_name, $cat_name, $phrases_str) = split /\t/, $_;
        }
        my ($cat_id, $flags_str) = split /:/, $cat_id_colon_flags_str;
        my $phrase_state = {};
        $phrase_state->{$_} = 1 for (split /,/, $phrases_str);
        $phrase_state->{$_} = 1 for @{$lang_catid_vadd_phrases->{$lang}{$cat_id}};
        $phrase_state->{$_} = 0 for @{$lang_catid_vdelete_phrases->{$lang}{$cat_id}};
        my $new_phrases_str = join ',', sort grep {$phrase_state->{$_}} keys %$phrase_state;
        if ($lang eq 'ru') {
            print G join("\t", ($cat_id_colon_flags_str, $cat_name, $new_phrases_str));
        } else {
            print G join("\t", ($cat_id_colon_flags_str, $ru_cat_name, $cat_name, $new_phrases_str));
        }
        print G "\n";
    }
    close G;
    close F;
    $proj->do_sys_cmd("mv $temp_file $virtual_categories_file") or die("mv $temp_file $virtual_categories_file failed");
    $proj->log("/ updating $lang virtual categories");
}

# данные из таблиц и соответствующие им словари
my @updates = (
    [$deleted_categs,   $Utils::Common::options->{categs_deleted}],
    [$categs_flags,     $Utils::Common::options->{categs_add_flags}, removed => $deleted_flags],
    [$deleted_flags,    $Utils::Common::options->{categs_deleted_flags}, removed => $categs_flags],
    [$categs_parents,   $Utils::Common::options->{categs_parents}, replace => 1]
);
for my $lang (sort keys %{$Utils::Common::options->{categs_web}}) {
    push @updates, [$categs_antiwords->{$lang}, $Utils::Common::options->{categs_antiwords}{$lang}] if $categs_antiwords->{$lang};
    push @updates, [$categs_add->{$lang}, $Utils::Common::options->{categs_web}{$lang}] if $categs_add->{$lang};
    push @updates, [$categs_del->{$lang} || {}, $Utils::Common::options->{categs_web_deleted}{$lang}, removed => $categs_add->{$lang}] 
        if $categs_del->{$lang} || $categs_add->{$lang};
}

# сохранение в дополнительных словарях
for (@updates) {
    my ($categs, $dict_file, %opts) = @$_;
    my $removed = $opts{removed} || {};
    
    next if !%$categs && !%$removed;

    my $removed_names = {  };
    for my $cid (keys %$removed) {
        my $categ = $proj->get_category($cid, "ru");

        $removed_names->{$categ->{CategoryName}} = $removed->{$cid} if $categ;
    }

    $proj->log("updating $dict_file");
    #print Dumper($removed, $removed_names);
    
    # текущие данные
    my $data = {};
    open(F, $dict_file) or die( "can't read '$dict_file'");
    while(<F>) {
        chomp;
        my @a = split "\t";
        next if !$a[0];

        if($removed_names->{$a[0]}) {
            my %h = map{$_=>1} @{$removed_names->{$a[0]}};
            $a[1] = join ",", grep{!$h{$_}} split(",", $a[1]);
        }

        $data->{$a[0]} = $a[1] || ""; 
    }
    close F;

    # новые фразы
    for my $cid (sort keys %$categs) {
        my $categ = $proj->get_category($cid, "ru");

        if(!$categ) {
            $proj->log("WARNING: no category with id $cid");
            next;
        }

        my $ru_name = $categ->{CategoryName};
        $data->{$ru_name} = ($data->{$ru_name} && !$opts{replace} ? $data->{$ru_name} . "," : "") . 
            join(",", sort @{$categs->{$cid}});
    }

    # сохранение во временный файл
    my $temp_file = $proj->{temp_dir} . "/caddphr_web";
    open(F, "> $temp_file") or die("can't write to '$temp_file'");
    for my $categ (sort keys %$data) {
        print F "$categ\t".$data->{$categ}."\n";
    }
    close F;

    # копирование в dicts
    $proj->do_sys_cmd("mv $temp_file $dict_file") or die("mv failed");
}

# сохраняем результат
open F, "> " . $Utils::Common::options->{infuse_web_result_file} or die($!);
my $result_mask = 0;
$result_mask |= $result_masks->{$_} for keys %$result_data;
print F "$result_mask\n";
close F;

# обновление статусов
if (not exists $args{test}) {
    $proj->log("set_user_phrase_status_by_id ... (" . (scalar @$phrases) . " phrases)");
    for my $phr (@$phrases) {
        $proj->set_user_phrase_done_status_by_id(
            $phr->{CatID},
            $phr->{InitialPhraseID},
            $phr->{Action},
            $phr->{Language},
            "Done"
        );
    }
    $proj->log("set_user_phrase_status_by_id done");
}

$proj->log("done");

sub infuse_categories_text_fields {
    $proj->log("infusing CategoriesTextFields");
    my $temp_file = $Utils::Common::options->{dirs}{temp} . '/dict_categories_text_fields.tmp';
    my $dict_file = $Utils::Common::options->{dirs}{dicts} . '/dict_categories_text_fields';
    open (my $temp_filehandle, '>', $temp_file) or die($!);
    my @list = @{$proj->categories_text_fields->List};
    for my $element (@list) {
        my $result = { map {$_ => $element->{$_}} qw/CatID Language Key Value UpdateTime/ };
        print $temp_filehandle JSON::to_json($result) . "\n";
    }
    close $temp_filehandle;
    $proj->do_sys_cmd("mv $temp_file $dict_file");
    $proj->log("/ infusing CategoriesTextFields");

}

sub update_dict_file {
    my ($proj, $dict_file, $pairs) = @_;
    $proj->log("updating $dict_file");

    die if !-e $dict_file;
    open F, ">> $dict_file" or die($!);
    for my $pair (@{$pairs}) {
        print F "$pair\n";
    }
    close F;

    $proj->log("done");
}

