#!/usr/bin/perl

use my_inc '../..';
use Direct::Modern;
use Settings;

use Direct::Storage;
use ScriptHelper;
use Yandex::Retry qw/relaxed_guard retry/;
use PrimitivesIds qw/get_clientid get_key2clientid/;
use Reports::Queue;
use ShardingTools qw/ppc_shards/;
use Yandex::DBTools;
use JSON;
use Reports::Queue;
use Yandex::Compress;
use Path::Tiny;

=head2 DESCRIPTION

Скрипт для переноса файлов из MongoDB (GridFS) (и из mysql ppcfiles тоже) в MDS

Параметры:
    --collection -n    Имя коллекции. Предполагается, что имя в mds и mongo совпадает
    --copy -c      Выполнить копирование файлов
    --rename -r    Переименовать коллекции в mongo, добавить .to_delete
    --undo -u      Переименовать коллекции обратно, удалить .to_delete
    --sleep-coef N Пауза между итерациями

Для проверки что --rename / --undo успешно отработал:

    mongo -u rouser -p doo2Aesa ppcfiles01e.yandex.ru:27017/ppcfiles
    > show collections

Должны быть две коллекции: $collections.files и $collections.chunks
После --rename -- $collections.files.to_delete, $collections.chunks.to_delete
При последующих попытках записи в несуществующие коллекции они пересоздадутся,
прежде чем окончательно удалять коллекции (to_delete), нужно убедиться что
они не пересоздались.

=cut

my $COPY = 0;
my $RENAME = 0;
my $UNDO_RENAMNE = 0;
my $COLLECTION;
my $SLEEP_COEF = 0;
my $PRE_DEPLOY = 0;
extract_script_params(
    "copy|c" => \$COPY,
    "rename|r" => \$RENAME,
    "undo|u" => \$UNDO_RENAMNE,
    "collection|n=s" => \$COLLECTION,
    "sleep-coef=s" => \$SLEEP_COEF,
    "pre-deploy" => \$PRE_DEPLOY,
);

unless ($COLLECTION) {
    $log->die("--collection (-n) required");
}

my $COLLECTION_MDS = $COLLECTION =~ s!\.!_!gr;

unless ($COPY || $RENAME || $UNDO_RENAMNE) {
    $log->die("--copy (-c) or --rename (-r) or --undo (-u) required");
}

$log->out("START");

my $storage = Direct::Storage->new();

#my $mongo = Yandex::MongoDB->new(db => $Settings::mongodb_name, query_timeout => 1_200_000);
#my $mongo = Yandex::MongoDB->new(db => "ppcfiles", query_timeout => 1_200_000);
#
#my $mdb = $mongo->{_database};
#my $coll = $mdb->get_collection("$COLLECTION.files");
#my $count = $coll->count();
# сервер монги уже отключён, но скрипт используется и для копирования из ppcfiles, при котором подключение к монге не нужно
my $mongo = undef;

my $mdb = undef;
my $coll = undef;
my $count = undef;

my %SPECIAL = (
    certification_request => \&copy_certification_request,
    api_forecast => \&copy_api_forecast,
    xls_history => \&copy_xls_history,
    offline_stat_reports => \&copy_offline_stat_reports,
    offline_stat_reports => \&copy_offline_stat_reports,
    report_dynamic => \&copy_report_dynamic,
    mod_licenses => \&copy_mod_licenses,
    report_performance => \&copy_report_performance,
    xls_reports => \&copy_xls_reports,
    sale_reports => \&copy_sale_reports,
);

if ($COPY && $SPECIAL{$COLLECTION_MDS}) {
    $SPECIAL{$COLLECTION_MDS}->();
}

if ($COPY && !$SPECIAL{$COLLECTION_MDS}) {
    my $files_cur = $coll->find();
    my $n = 0;
    while (my $file = $files_cur->next) {
        my $rg = relaxed_guard times => $SLEEP_COEF;
        $n++;
        my $name = $file->{filename};
        $log->out("[$n/$count] $name");
        my $data = $mongo->gridfs_get_data($COLLECTION, { filename => $name })->{content};
        $storage->save($COLLECTION_MDS, $data, filename => $name);
    }
}

if ($RENAME) {
    rename_collection("$COLLECTION.files" => "$COLLECTION.files.to_delete");
    rename_collection("$COLLECTION.chunks" => "$COLLECTION.chunks.to_delete");
}

if ($UNDO_RENAMNE) {
    rename_collection("$COLLECTION.files.to_delete" => "$COLLECTION.files");
    rename_collection("$COLLECTION.chunks.to_delete" => "$COLLECTION.chunks");
}

$log->out("END");

sub copy_xls_reports
{
    # ~ 1GB of data
    my %where = ();
    $where{ready_time__gt} = get_one_field_sql(PPCDICT, "select max(ready_time) from xls_reports") // '0000-00-00';
    if ($PRE_DEPLOY) {
        # перед выкладкой копируем только завершенные файлы
        $where{status} = 'ready';
    }
    $log->out(\%where);
    my $reports = get_all_sql(PPCFILES, [ "select * from xls_reports", where => \%where ]);
    my $uid2clientid = get_key2clientid(uid => [ map { $_->{uid} } @$reports ]);
    my $storage = Direct::Storage->new();
    my $cnt = get_one_field_sql(PPCFILES, ["select count(*) from xls_reports", where => \%where ]);
    my $n = 0;
    for my $row (@$reports) {
        $n++;
        my $rg = relaxed_guard times => $SLEEP_COEF;
        my $data = delete $row->{xls_data};
        do_insert_into_table(PPCDICT, 'xls_reports', $row, ignore => 1);
        unless ($data) {
            # может быть незавершенное задание в очереди. сохраняем в таблицу, но не в mds
            $log->out("[WARN] empty data", $row);
            next;
        }
        $log->out("[$n/$cnt] $row->{report_name}");
        unless ($uid2clientid->{$row->{uid}}) {
            $log->out("[$n/$cnt] ERROR: invalid uid $row->{uid}");
            next;
        }
        $storage->save('xls_reports', \$data, filename => $row->{report_name}, ClientID => $uid2clientid->{$row->{uid}});
    }
    # для сверки:
    # m pr:ppcfiles 'select id, status from xls_reports' > old
    # m pr:ppcdict  'select id, status from xls_reports' > new
    # diff old new
}

sub copy_sale_reports
{
    my $ids = get_one_column_sql(PPCFILES, "select report_id from sale_reports");
    my $count = get_one_field_sql(PPCFILES, "select count(*) from sale_reports");
    my $n = 0;
    for my $id (@$ids) {
        $n++;
        my $row = get_one_line_sql(PPCFILES, "select * from sale_reports where report_id = ?", $id);
        
        retry tries => 5, pauses => [0.1,1,5], sub {
            $storage->save('sale_reports', inflate($row->{data_compressed}), filename => $row->{filename});
        };
        
        $log->out("[$n/$count] $row->{filename}");
    }
}

sub copy_mod_licenses
{
    my $count = get_one_field_sql(PPCFILES, "select count(*) from mod_licenses");
    my ($min, $max) = get_one_line_array_sql(PPCFILES, "select min(id), max(id) from mod_licenses");
    my $chunk = 10;
    my $n = 0;
    my $storage = Direct::Storage->new();
    my $fh = path("$Settings::LOG_ROOT/mongo_to_mds_mod_licenses.data")->openw();
    while ($min < $max) {
        my $rows = get_all_sql(PPCFILES, ["select * from mod_licenses", where => { id__between => [ $min, $min + $chunk - 1 ] }]);
        for my $row (@$rows) {
            $n++;
            my $rg = relaxed_guard times => $SLEEP_COEF;
            if (length($row->{file_data}//'') == 0) {
                $log->out("[$n/$count] ERROR: empty data");
                next;
            }
            my $file;
            eval {
                $file = retry tries => 5, pauses => [0.1,1,5], sub {
                    $storage->save($COLLECTION_MDS, $row->{file_data}, uid => $row->{uid});
                };
                $log->out("[$n/$count] ID: $row->{id} => @{[$file->filename]}");
            };
            if ($@) {
                # invalid uid
                $log->out($@);
                next;
            }
            print {$fh} "$row->{id}\t@{[$file->filename]}\n";
        }
        $min += $chunk;
    }
}

sub copy_xls_history
{
    for my $shard (ppc_shards()) {
        my $lg = $log->msg_prefix_guard("[shard $shard]");
        $log->out("selecting names");
        my ($min, $max) = get_one_line_array_sql(PPC(shard => $shard), "select min(id), max(id) from xls_history");
        my $count = get_one_field_sql(PPC(shard => $shard), "select count(*) from xls_history");
        my $chunk = 5000;
        my $n = 0;
        while ($min < $max) {
            my $rows = get_all_sql(PPC(shard => $shard), [
                "select SQL_CALC_FOUND_ROWS u.ClientID, xh.filename
                from xls_history xh
                join campaigns c using(cid)
                join users u using(uid)",
                where => {
                    id__between => [ $min, $min + $chunk - 1],
                }
            ]);
            $log->out("selected ".(scalar @$rows)." rows");
            for my $row (@$rows) {
                $n++;
                my $rg = relaxed_guard times => $SLEEP_COEF;
                my $name = $row->{filename};
                my $client_id = $row->{ClientID};
                my $data = retry tries => 5, pauses => [0.1,1,5], sub {
                    $mongo->gridfs_get_data($COLLECTION, { filename => $name })->{content};
                };

                unless (defined $data) {
                    $log->out("[$n/$count] ERROR: no data for $name");
                    next;
                }
                retry tries => 5, pauses => [0.1,1,5], sub {
                    $storage->save($COLLECTION_MDS, $data, ClientID => $client_id, filename => $name);
                };
                $log->out("[$n/$count] $name");
            }
            $min += $chunk;
        }
    }
}

sub copy_certification_request
{
    my $uid2clid = {};
    my $rows = get_all_sql(PPCDICT, "select SQL_CALC_FOUND_ROWS uid, request_info from api_app_certification_request");
    my $count = get_one_field_sql(PPCDICT, "SELECT found_rows()");
    my $n = 0;
    for my $row (@$rows) {
        $n++;
        my $rg = relaxed_guard times => $SLEEP_COEF;
        $uid2clid->{$row->{uid}} //= get_clientid(uid => $row->{uid});
        unless ($uid2clid->{$row->{uid}}) {
            $log->out("ERROR: no client_id for uid $row->{uid}");
            next;
        }
        my $req = from_json($row->{request_info});
        next unless $req->{specifications_and_screenshots};
        for my $file (@{$req->{specifications_and_screenshots}}) {
            my $name = $file->{name};
            my $data = $mongo->gridfs_get_data($COLLECTION, { filename => $name })->{content};
            unless (defined $data) {
                $log->out("[$n/$count] ERROR: no data for $name");
                next;
            }
            $log->out("[$n/$count] $name");
            $storage->save($COLLECTION_MDS, $data, filename => $name, ClientID => $uid2clid->{$row->{uid}});
        }
    }
}

sub copy_api_forecast
{
    # api_forecast + api_wordstat
    my $uid2clid = {};
    for my $type (qw /forecast wordstat/)
    {
        my $rows = get_all_sql(PPCDICT, "select SQL_CALC_FOUND_ROWS * from api_queue_$type");
        my $count = get_one_field_sql(PPCDICT, "select found_rows()");
        my $n = 0;
        for my $row (@$rows) {
            $n++;
            my $rg = relaxed_guard times => $SLEEP_COEF;
            
            $uid2clid->{$row->{uid}} //= get_clientid(uid => $row->{uid});
            unless ($uid2clid->{$row->{uid}}) {
                $log->out("[$n/$count] ERROR: missing client_id for uid $row->{uid}");
                next;
            }

            my $name = $type . '_' . $row->{id};
            my $data = retry tries => 5, pauses => [0.1,1,5], sub {
                $mongo->gridfs_get_data("api.$type", { filename => $name })->{content};
            };
            unless ($data) {
                $log->out("[$n/$count] ERROR: missing data for $name");
                next;
            }
            retry tries => 5, pauses => [0.1,1,5], sub {
                $storage->save("api_$type", $data, filename => $name, ClientID => $uid2clid->{$row->{uid}});
            };
            $log->out("[$n/$count] $name");
        }
    }
}

sub copy_offline_stat_reports
{
    my $rows = get_all_sql(PPC(shard => 'all'), [
        "select SQL_CALC_FOUND_ROWS id, u.ClientID, report_data_parts_qty, report_data_format
        from stat_reports sr
        join users u using(uid)",
    ]);
    my $count = select_found_rows(PPC(shard => 'all'));
    my $n = 0;
    for my $row (@$rows) {
        for my $part_id (1 .. $row->{report_data_parts_qty}) {
            $n++;
            my $rg = relaxed_guard times => $SLEEP_COEF;
            my $name = Reports::Queue->get_report_filename($row, $part_id);
            $log->out("[$n/$count] $name");
            my $data = $mongo->gridfs_get_data($COLLECTION, { filename => $name })->{content};
            unless ($data) {
                $log->out("[$n/$count] ERROR: no data for $name");
                next;
            }
            $storage->save($COLLECTION_MDS, $data, filename => $name, ClientID => $row->{ClientID});
        }
    }
}

sub copy_report_dynamic
{
    my $rows = get_all_sql(PPCDICT, "select SQL_CALC_FOUND_ROWS uid, options from api_queue_search_query");
    my $count = get_one_field_sql(PPCDICT, "SELECT found_rows()");
    my $n = 0;
    my $uid2clientid = get_key2clientid(uid => [ map { $_->{uid} } @$rows ]);
    for my $row (@$rows) {
        $n++;
        my $rg = relaxed_guard times => $SLEEP_COEF;
        my $opt = from_json($row->{options});
        for my $k (grep { /_filename$/ } keys %$opt) {
            my $name = $opt->{$k};
            my $data = $mongo->gridfs_get_data($COLLECTION, { filename => $name })->{content};
            $log->out(sprintf "[$n/$count] $name (%.2f Mb)", length($data//'')/1024/1024);
            unless ($data) {
                $log->out("[$n/$count] ERROR: no data for $name");
                next;
            }
            $storage->save($COLLECTION_MDS, $data, filename => $name, ClientID => $uid2clientid->{$row->{uid}});
        }
    }
}

sub copy_report_performance
{
    my $rows = get_all_sql(PPCDICT, "select SQL_CALC_FOUND_ROWS uid, options from api_queue_search_query where type = 'performance'");
    my $count = get_one_field_sql(PPCDICT, "SELECT found_rows()");
    my $n = 0;
    my $uid2clientid = get_key2clientid(uid => [ map { $_->{uid} } @$rows ]);
    for my $row (@$rows) {
        $n++;
        my $rg = relaxed_guard times => $SLEEP_COEF;
        my $opt = from_json($row->{options});
        for my $k (grep { /_filename$/ } keys %$opt) {
            my $name = $opt->{$k};
            my $data = $mongo->gridfs_get_data($COLLECTION, { filename => $name })->{content};
            $log->out(sprintf "[$n/$count] $name (%.2f Mb)", length($data//'')/1024/1024);
            unless ($data) {
                $log->out("[$n/$count] ERROR: no data for $name");
                next;
            }
            $storage->save($COLLECTION_MDS, $data, filename => $name, ClientID => $uid2clientid->{$row->{uid}});
        }
    }
}

sub rename_collection
{
    my ($from, $to) = @_;
    $log->out("rename $from => $to");
    my $coll = $mdb->get_collection($from);
    $coll->rename($to);
}

