#!/usr/bin/perl -w

use strict;
use FindBin;
use lib "$FindBin::Bin/../lib";
use lib "$FindBin::Bin/../wlib";
use lib "$FindBin::Bin/../cpan";
use Utils::Common;
use Project;

use IO::Socket qw/:DEFAULT :crlf/;
use IO::Poll;
use IO::Handle;
use Fcntl ':flock';
use File::Basename;
use Getopt::Long;
use JSON qw(to_json from_json);

use Thread 'async';

use JSON;

use POSIX ":sys_wait_h";
use POSIX;
use Data::Dumper;
use File::Temp qw/ :mktemp  /;
use File::Basename;
use Time::HiRes qw/gettimeofday tv_interval usleep/;
use Digest::MD5;
use Encode;
use LWP::UserAgent;
use File::Copy;
use Parallel::ForkManager;
use Storable qw(dclone);

use Getopt::Long;
use CatalogiaMediaProject;

use Utils::Sys qw(
    md5int
    mem_usage
    get_file_lock
    release_file_lock
    print_err
    handle_errors
    get_children_pids
    log_time_fmt
);
use Utils::Urls;
use Utils::Hosts qw( get_curr_host get_host_info get_host_role );

use BM::BMClient::FCGI_HTTP_Server qw(
    get_fcgi_parent_pid
    set_fcgi_parent_pid
    set_process_state
    get_fcgi_processes_count
);
use BM::SolomonClient;

use utf8;
use open ':utf8';
no warnings 'utf8';

use constant MAX_FORKS => 60;

handle_errors();

select STDERR; $| = 1;
select STDOUT; $| = 1;

my $INCORRECT_INPUT_DATA_MSG = "Incorrect_input_data";
my $SERVICE_UNAVAILABLE_MSG  = "Service_unavailable";

my $COMMAND = $ARGV[0] || die ("no command!");
print_err("COMMAND: $COMMAND");
if ( $COMMAND eq 'stop' ) {
    DoStop() && print_err("server stopped on port");
    exit(0);
} elsif ( $COMMAND eq 'restart' ) {
    DoStop() && print_err("previous server stopped on port");
    print_err("now starting new server");
} elsif ( $COMMAND eq 'start' ) {
    if ( get_fcgi_parent_pid() && IsProcessExists( get_fcgi_parent_pid() ) ) {
        print_err("do not start server, old server exists!");
        exit(0);
    }
    print_err("start server...");
} else {
    die("UnknownCommand:$COMMAND");
}

# параллельные форки
my $fork_manager = new Parallel::ForkManager(MAX_FORKS);

# установка переменных окружения, коннект к нужному порту, сколько сообщений в очереди
require CGI::Fast;
$ENV{FCGI_SOCKET_PATH} = ":9091";
$ENV{FCGI_LISTEN_QUEUE} = 200;
#костыль для перенаправления STDERR (копипаст из блока BEGIN в CGI::Fast)
#нужно, т.к. при большом количестве сообщений в STDERR nginx отдает 502 - https://st.yandex-team.ru/CATALOGIA-1145 https://st.yandex-team.ru/SUPBL-311/commits
#вручную создаем запрос, с которым работает CGI::Fast, с пустым потоком вместо STDERR
$CGI::Fast::Ext_Request = FCGI::Request( \*STDIN, \*STDOUT, IO::Handle->new(),
                                \%ENV, FCGI::OpenSocket( $ENV{FCGI_SOCKET_PATH}, $ENV{FCGI_LISTEN_QUEUE} ), 1 );

require FCGI::ProcManager;
require FCGI;
# /установка переменных окружения, коннект к нужному порту

DoDaemonize();

print "PID:" . $$ . "\n";
my $n_processes = get_host_info()->{number_of_fcgi_processes} // $Utils::Common::options->{fcgi_http_server}{n_processes};
my $proc_manager = FCGI::ProcManager->new({ n_processes => $n_processes });
$proc_manager->pm_manage();

# инициализация $proj
my $proj = CatalogiaMediaProject->new({
    load_dicts => 1,
    load_minicategs_light => 1,
    load_languages => [qw(ru en tr)],
    use_comptrie_subphraser => 1,
    use_sandbox_categories_suppression_dict => 1,
    no_auth => 1,
    no_form => 1,
    nrmsrv => 0,                # do not use prefprojsrv
    memlog => 1,
});
$proj->log("CatalogiaMediaProject created");

# подгрузка нужных ленивых словарей в память
my $host_role = get_host_role();

set_process_state($proj, $$, 'alive,free');

my $query_number = 0;
my $query;

# waiting for query!
# do fork and kill parent

my $solomon_client = BM::SolomonClient->new();
my $current_host = get_curr_host();

print_err("SERVER READY!");
my %saved_env = %ENV;

my $worker_life_base_part = 3600 * 12; # 12h
my $worker_life_rand_part = 3600 * 2;  # +-2h
my $worker_life_timeout = $worker_life_base_part + int( rand() * $worker_life_rand_part * 2) - $worker_life_rand_part;
my $worker_start_time = time();
while ( $query = CGI::Fast->new() ) {
    %ENV = (%saved_env, %ENV);
    $proc_manager->pm_pre_dispatch();
    $query_number++;

    set_process_state($proj, $$, 'alive,busy');

    my $start_time = [gettimeofday];

    print_err("query_number: $query_number Begin");
    send_fcgi_sensor({sensor => "begin_count"});

    my $env_str = to_json(\%ENV, {canonical => 1});

    # разбираем параметры, кодируем их в utf8
    my $upload_hash = {};
    for my $param ( $query->param() ) {
        my $text1 = $query->param($param);
        $upload_hash->{$param} = $text1;
        $upload_hash->{$param} = Encode::decode('utf-8',$upload_hash->{$param});
    }
    # /разбираем параметры, кодируем их в utf8
    my $cmd = $upload_hash->{cmd} || 'empty';
    my $act = $upload_hash->{act} || 'empty';
    my $request_uri = $ENV{REQUEST_URI} // '';

    if ($cmd eq 'empty' and $request_uri eq '/ping') {
        # Для обработки /ping - проверка балансером
        $cmd = 'ping';
    }

    $upload_hash->{cmd} = $cmd;
    $upload_hash->{act} = $act;
    my @upload_hash_output_keys = qw[ actparam format ordline region ]; # Только некоторые поля, чтоб не писать в лог все списки фраз из поля 'data' и т.п.
    my $upload_hash_output = { map { $_ => $upload_hash->{$_} } grep { defined $upload_hash->{$_} } @upload_hash_output_keys };
    my $upload_hash_output_str = to_json($upload_hash_output, {canonical => 1});
    $proj->log("START cmd: $cmd act: $act query_number: $query_number upload_hash_output: $upload_hash_output_str env: $env_str");
    send_fcgi_sensor({
        sensor => "cmds_begin",
        labels => {
            cmd => $cmd,
            act => $act,
        },
    });

    set_process_state($proj, $$, "alive,busy,processing:$cmd.$act");

    my $res_text = 'end';
    eval {
        my $text_result = process_upload_hash($proj, $upload_hash);
        $proj->log("process_upload_hash done");
        print "Content-Type: text/plain; charset=\"utf-8\"\n\n";
        print $text_result;
    };
    my $die_msg;
    if ($@) {
        $res_text = 'die';
        $die_msg = $@;
        $die_msg =~ s/[\n\r]/  /g;
        $proj->log("ERROR in cmd: $cmd act: $act query_number: $query_number die_msg: $die_msg env: $env_str");
        my $status_str = "Status: 500 Internal error\n";
        if ($die_msg =~ m/$INCORRECT_INPUT_DATA_MSG/){
            $status_str = "Status: 400 Bad Request\n";
            $res_text = 'end'; #клиентские ошибки отправляем в графит как успешно обработанные
        }
        if ($die_msg =~ m/$SERVICE_UNAVAILABLE_MSG/){
            $status_str = "Status: 503 Service Unavailable\n";
            $res_text = 'end';
        }
        print $status_str;
        print "Content-Type: text/plain; charset=\"utf-8\"\n\n";
        print $die_msg . "\n";
        print $current_host . " " . log_time_fmt() . " [$$]\n";
    }
    send_fcgi_sensor({
        sensor => "cmds_result",
        labels => {
            cmd    => $cmd,
            act    => $act,
            result => $res_text,
        },
    });
    $proj->log("processed");

    my $res_test_encode = eval { test_encode($proj) };
    if ($@) {
        $proj->log("ERROR: test_encode: $@");
    }

    my $last = '';
    if ($res_text eq 'die') {
        unless ($die_msg =~ m/$INCORRECT_INPUT_DATA_MSG/) {
            # После die не продолжаем обработку запросов, т.к. могло измениться окружение
            # Кроме $INCORRECT_INPUT_DATA_MSG (в этом случае окружение не поменялось. Выходить не нужно, чтобы из-за некорректных запросов не было массового рестарта скриптов fcgi)
            $last .= 'Died';
        }
    }
    unless ($res_test_encode) {
        # kostyl for https://st.yandex-team.ru/CATALOGIA-786
        $last .= 'Encode';
    }
    unless ($last) {
        my $mem = int( mem_usage() / (1024 * 1024)); # Mb
        $proj->log("Memory: $mem");
        my $stop_reason;
        my $max_mem = 3.7 * 1024; # 3.7 Gb
        if ($mem > $max_mem) {
            $stop_reason = 'Memory';
        } elsif ($worker_start_time + $worker_life_timeout < time()) {
            $stop_reason = 'Timeout';
        }

        if ($stop_reason) {
            my $lock_name = "fcgi-http-server_exit";
            if (get_file_lock($lock_name)) { # Ограничиваем количество процессов, которые перезапускаются одновременно
                my $alive_processes_number = get_fcgi_processes_count($proj, 'alive');
                if ($alive_processes_number >= 0.7 * $n_processes) {
                    $proj->log("need stop, reason: $stop_reason, alive: $alive_processes_number, stop");
                    set_process_state($proj, $$, 'exiting');
                    release_file_lock($lock_name);
                    $last = $stop_reason;
                } else {
                    $proj->log("WARN: need stop, reason: $stop_reason, but there are only $alive_processes_number alive processes");
                    release_file_lock($lock_name);
                }
            } else {
                $proj->log("WARN: need stop, reason: $stop_reason, but could not get lock");
            }
        }
    }
    $last ||= 0;

    my $duration = tv_interval($start_time);
    send_fcgi_sensor({
        sensor => "cmds_timings",
        labels => {
            cmd    => $cmd,
            act    => $act,
        },
        value => $duration,
    });
    $proj->log("DONE  cmd: $cmd act: $act query_number: $query_number Duration: $duration is_last: $last env: $env_str");

    set_process_state($proj, $$, 'alive,free') unless $last;
    print_err("query_number: $query_number End");

    if ( $CGI::Fast::Ext_Request ) {
        $CGI::Fast::Ext_Request->Finish();
    }
    else {
        FCGI::finish();
    }
    $proc_manager->pm_post_dispatch();

    if ($last) {
        $proj->log("last: $last cmd: $cmd act: $act");
        last;
    }
}

set_process_state($proj, $$, '');

print_err("HERE WE EXIT.");
$proj->log("HERE WE EXIT.");

exit(0);


sub process_upload_hash {
    my ($proj, $upload_hash) = @_;

    $proj->current_lang($upload_hash->{lang} || 'ru');

    my $text_result = "";
    my $need_encode_utf8 = 1;

    $upload_hash->{act} //= 'empty';
    $upload_hash->{cmd} //= 'empty';

    if ( $upload_hash->{act} eq 'alive' ) {
        $text_result = 'ALIVE';
    } elsif ( $upload_hash->{cmd} eq 'ping' ) {
        # Для обработки /ping - проверка балансером
        $text_result = '';
    } elsif ($upload_hash->{cmd} eq 'test_die') {
        # Для тестирования die в fcgi
        die "TEST die";
    } elsif ($upload_hash->{cmd} eq 'test_sleep') {
        # Для тестирования timeout
        $proj->do_sys_cmd("sleep " . ($upload_hash->{duration} // 5));
    } elsif ($upload_hash->{cmd} eq 'test_warn') {
        # https://st.yandex-team.ru/CATALOGIA-1145
        my $cc = $upload_hash->{cc} || 0;
        warn "This is a test warning for CATALOGIA-1145 ($_)"  for (1 .. $cc);
    } elsif ($upload_hash->{cmd} eq 'test_host_info') {
        # Для тестирования работы под балансером
        $text_result = to_json(get_host_info()) . "\n";
    } elsif ($upload_hash->{cmd} eq 'test_fcgi_mem') {
        # Для тестирования работы с памятью в fcgi
        # wget -O- 'http://bmapi-test01i.yandex.ru/fcgi-bin/?data=8589934592%23END&cmd=test_fcgi_mem' 2>/dev/null ; echo
        my ($mem) = $upload_hash->{data} =~ m/^(\d+)/;
        $mem //= 0;
        my @a;
        $proj->log("test_fcgi_mem ($mem) ...");
        while (mem_usage() < $mem) {
            push @a, [ 1 .. 1024*1024 ];
        }
        $proj->log("test_fcgi_mem ($mem) done");
        $text_result = "mem:$mem [$$] OK";
        $text_result .= "#END";
    } elsif ( $upload_hash->{cmd} eq 'page' ) {
        my $act = $upload_hash->{act};
        my $data = $upload_hash->{data};

        if ($data !~ /#END$/) {
            $text_result .= "ERROR: data without #END mark. do exit";
        } else {
            $data =~ s/#END$//;

            my $page_list = $proj->page_list([ map{$proj->page("", "", $_)} split "\n", $data ]);
            my $phrase_list = $page_list->$act;

            $text_result .= join('', map {"$_\n"} @$phrase_list) . "#END";
        }
    } elsif ( $upload_hash->{act} ne 'empty' ) {
        my $act = $upload_hash->{act};
        my $data = $upload_hash->{data};
        my $actparam = $upload_hash->{actparam};
        my $format = $upload_hash->{format};
        my $do_list_acts = sub {
            my ($phl, $acts_list) = @_;
            $acts_list = [ split(',', $acts_list) ] unless ref($acts_list); #Если список передали строкой - разбиваем её в массив
            for my $acts_list_item (@$acts_list) {
                my $disallow_msg = _disallow_dispatch($proj, $acts_list_item);
                die "$INCORRECT_INPUT_DATA_MSG: $disallow_msg" if defined $disallow_msg;
                if (defined $actparam) {
                    $phl = $phl->$acts_list_item($actparam);
                } else {
                    $phl = $phl->$acts_list_item;
                }
            }
            return $phl;
        };

        my $prev_region = $proj->current_region; #Сохраняем исходные настройки регионов, и продолжаем в eval-блоке чтобы восстановить их даже при исключении

        eval {
            if ($upload_hash->{region}) { #Подхватываем настройки из запроса
                my @regs = grep {$_} split /\D+/, $upload_hash->{region};
                $proj->current_region([@regs]);
            }

            if ( $data !~ /#END$/ ) {
                die "$INCORRECT_INPUT_DATA_MSG: Data without #END mark";
            } else {
                $data =~ s/#END$//;
                $data = [split(/\r?\n/, $data)];
                if ($upload_hash->{'ordline'}) {
                    for my $pht (@$data) {
                        my $phl = $do_list_acts->($proj->phrase_list([$pht]), $act);
                        $text_result .= join(',', @$phl)."\n";
                    }
                } else {
                    my $phl = $do_list_acts->($proj->phrase_list($data), $act);
                    $text_result .= join('', map {"$_\n"} @$phl);
                }
                $text_result .= "#END";
            }
        };

        #Восстанавливаем прежние настройки и перевыбрасываем исключение из eval-блока, если оно там было
        my $exception = $@;
        if ($upload_hash->{region}) { #Возвращаем исходные настройки регионов
            $proj->current_region($prev_region);
        }
        die $exception if $exception;

    } elsif ( $upload_hash->{cmd} eq 'yml2directinf' ) {
        return '' unless $upload_hash->{url};
        $need_encode_utf8 = 0;
        my $datacamp_crawler_url = Utils::Urls::get_cgi_url_param($upload_hash->{url}, "site");
        if (Utils::Urls::is_datacamp_feed_url($upload_hash->{url}) and $datacamp_crawler_url) {
            # for smart by site, fake answer
            $text_result = BM::BannersMaker::Feed::fake_yml2directinf($proj, $datacamp_crawler_url);
        } else {
            my @arr_feed_params_names = qw{ url max_file_size_type max_file_size business_type is_new_feed};
            if ($upload_hash->{login}) {
                push @arr_feed_params_names, qw{ login pass };
            }

            my $start_time_feed = [gettimeofday];
            if (Utils::Urls::is_datacamp_feed_url($upload_hash->{url})) {
                # yaml2directinf not support download from datacamp => get client feedurl from datacamp feedurl
                $upload_hash->{url} = Utils::Urls::get_cgi_url_param($upload_hash->{url}, "url");
            }
            my $NEW_FEED_ID = 449517;
            my $NEW_FEED_EXCEPTIONS = {
                # SUPBL-2765
                434104 => 1,
            };
            $upload_hash->{is_new_feed} = 1 if $upload_hash->{feed_id} >= $NEW_FEED_ID && !exists($NEW_FEED_EXCEPTIONS->{$upload_hash->{feed_id}});
            my $fd = $proj->feed({ ( download_timeout => 1200, solomon_client => $solomon_client), map { $_ => $upload_hash->{$_} } grep { $upload_hash->{$_} } @arr_feed_params_names });
            my $duration_feed = tv_interval($start_time_feed);
            send_fcgi_sensor({
                service => "yml2directinf",
                sensor  => "timings",
                labels  => {
                    metric => "feed_creation",
                },
                value   => $duration_feed,
            });

            $fd->fds->{business_type} = $upload_hash->{business_type} if $upload_hash->{business_type};

            my @params_to_pass = qw/debug gen_previews use_dse feed_id is_new_feed/;
            my %inf_params = map { $_ => $upload_hash->{$_} // '' } @params_to_pass;
            eval {
                # см. https://st.yandex-team.ru/DYNSMART-119
                $inf_params{gen_previews} = 1 if ($inf_params{gen_previews} eq '');
                # $inf_params{save_error_feeds} = 1; # собираем фиды с ошибкой
                $text_result .= $fd->yml2directinf( \%inf_params );
                if ( $fd->fds->{_offers_tskv_light_file} ) {
                    unlink $fd->fds->{_offers_tskv_light_file};
                }
            };
            if ($@) {
                $@ = 'Can\'t download feed or it is in incorrect format' unless ($upload_hash->{debug});
                my $prserr = $proj->json_obj->utf8->encode({
                    errors => [{
                        code => 1812,
                        message => 'Fatal error.',
                        message_ru => 'Фатальная ошибка.',
                        desc => "Parse error: $@\n",
                    }],
                });
                $text_result .= $prserr;
            }
        }
    } elsif ( $upload_hash->{cmd} eq 'get_feed' && $upload_hash->{feed_path}) {
        # для тестирования, upload фида
        $need_encode_utf8 = 0;
        my $feed_file = $upload_hash->{feed_path};
        $feed_file =~ s/^[\.\/]//g;

        # выделяем тематику
        my $thematics = $feed_file;
        $thematics =~ s/^\/*//;
        $thematics =~ s/(\.|\\).*//;
        my $all_thematics = $Utils::Common::options->{tests_banners_generation}{thematics};
        unless ($thematics ~~ @$all_thematics) {
            $thematics =~ s/(.*)\_.*$/$1/;
        }

        my $tests_feed_files_dir = $Utils::Common::options->{dirs}{tests_feed_files};
        my $path = '';
        # если хотим получить исходный фид, то это либо xml, либо csv
        if ( ($upload_hash->{source_feed} || '') eq '1' ) {
            $path = $tests_feed_files_dir . "/" . $thematics . "/" . $feed_file;
            $path .= (-f ($path.'.xml')) ? '.xml' : '.csv';
        } else {
            # полученный feed_file = "thematics.format"
            $path = $tests_feed_files_dir . "/" . $thematics . "/" . $feed_file;
            if (not -f $path) {
                # полученный feed_file = "thematics/thematics.format"
                $path = $tests_feed_files_dir . "/" . $feed_file;
            }
        }

        if ( not -f $path) {
            $text_result .= "can't find file '$path'";
        } else {
            open F, '<', $path;
            $text_result .= join('', <F>);
            close F;
        }
    } elsif ( $upload_hash->{cmd} eq 'empty' and  $upload_hash->{act} eq 'empty' and  ($upload_hash->{keywords} // '') eq 'ata' ) {
        # kostyl для неопознанных запросов   {"keywords":"ata","cmd":"empty","act":"empty"}
        $proj->log("WARN: Unknown data: " . to_json($upload_hash));
    } else {
        die "$INCORRECT_INPUT_DATA_MSG: " . to_json($upload_hash);
    }

    # kostyl for https://st.yandex-team.ru/CATALOGIA-786
    if ($upload_hash->{act} eq 'pages2pages_categs') {
        $proj->log("pages2pages_categs result: " . join(" // ", split /\n/, $text_result));
    }

    if ($need_encode_utf8) {
        $proj->log("Encode UTF-8");
        $text_result = Encode::encode('UTF-8', $text_result);
    }
    return $text_result;
}

sub _disallow_dispatch {
    my $proj = shift;
    my $act = shift;

    my $can = $proj->phrase_list->can($act);
    my $allow = $proj->phrase_list->can_be_externally_used($act, "bmapi");

    if ($can && $allow) {
        return undef;
    } elsif ($can) {
        return "Disallowed command: (" . $act . "), contact developers if you want it allowed";
    } else {
        return "Unknown command: (" . $act . ")";
    }
}

sub DoDaemonize {

    print_err("daemonizing, see output in logs.");

    # daemonize
    my $dpid = fork();
    exit if $dpid;
    die "Cannot fork to daemonize: $!\n" if !defined($dpid);

    open( STDIN,  "+>/dev/null" ) or die "Could not redirect STDIN to /dev/null";

    (POSIX::setsid()) || die("Cannot start a new session!\n");

    set_fcgi_parent_pid($$);
}

sub DoStop {
    my $old_process_pid = get_fcgi_parent_pid();
    if ( $old_process_pid ) {
        print_err("killing process $old_process_pid...");
        KillProcess( $old_process_pid );
        print_err("killed successfully process $old_process_pid...");
    }

    # kill all bad things wich are lisening to 9091:
    for ( my $att = 0; $att < 60; $att++ ) {
        my $command = 'netstat -nlp | grep \':9091\' | grep LISTEN | awk {\'print $7\'} | tail -n1';
        my $process_result = `$command`;
        print_err("COMMAND:$command");
        print_err("RESULT:$process_result");
        my $process2kill = 0;
        if ( $process_result =~ /^(\d+)/ ) {
            $process2kill = $1;
        }
        if ( $process2kill > 0 ) {
            print_err("also we want to kill some bad: $process2kill");
            KillProcess($process2kill);
        }
        else {
            last;
        }
        sleep(1);
    }

    # TODO
    my $alive_processes_dir = $BM::BMClient::FCGI_HTTP_Server::fcgi_processes_dir;
    if (-d $alive_processes_dir) {
        # TODO: rm only if exists
        my $cmd = "rm $alive_processes_dir/process_state_*";
        system($cmd);
        # TODO check exitcode (after 'TODO: rm only if exists')
    }

    return 1;
}

sub IsProcessExists {
    my ( $pid ) = @_;
    my $ps_command = 'ps axuf | awk {' . "'" . 'print $2' . "'" . '} | grep ^'.$pid.'$';
    return `$ps_command`;
}

sub KillProcess {
    my ( $pid ) = @_;
    if ( IsProcessExists($pid) ) {
        `kill $pid`;
        my $wait_start = time;
        while ( time - $wait_start < 120 ) { # максимум 2 минуты пока все умрет
            sleep 1;
            if ( !IsProcessExists($pid) ) {
                return 1;
            }
        }
        if ( IsProcessExists($pid) ) {
            print_err("cannot kill process:$pid, exiting");
            exit(0);
        }
    }
    return 1;
}

# https://st.yandex-team.ru/CATALOGIA-786
sub test_encode {
    my ($proj) = @_;

    my $str = "TestТест";
    my $text = $proj->json_obj->encode( [ $str ] );
    $proj->log("test_encode: encode( [ '$str' ] ) = '$text'");
    if ($text ne qq{["$str"]}) {
        $proj->log("ERROR: encode( [ '$str' ] ) = '$text'");
        return 0;
    }
    return 1;
}

sub send_fcgi_sensor {
    my ( $sensor_params ) = @_;

    my $sensor = {%$sensor_params, cluster => "host_info"};
    $sensor->{service} //= "fcgi_server";
    $sensor->{value} //= 1;
    $sensor->{labels}->{host} //= $current_host;

    $solomon_client->push_single_sensor($sensor);
}
