#!/usr/bin/env perl

use my_inc "..";

=head1 METADATA

<crontab>
    time: */5 * * * *
    package: scripts-switchman
    sharded: 1
    <switchman>
        group: scripts-other
    </switchman>
</crontab>
<juggler>
    host:   checks_auto.direct.yandex.ru
    sharded:        1
    ttl:            15m
    tag: direct_group_internal_systems
</juggler>

<crontab>
    time: * * * * *
    sharded: 1
    only_shards: 1
    package: scripts-sandbox
</crontab>
<juggler>
    host:   checks_auto.direct.yandex.ru
    name:           scripts.apiReportsClean.working.sandbox
    raw_host:       CGROUP%direct_sandbox
    raw_events:     scripts.apiReportsClean.working.sandbox.shard_$shard
    vars:           shard=1
    tag: direct_group_internal_systems
</juggler>

<crontab>
    time: */3 * * * *
    sharded: 1
    flock: 1
    <switchman>
        group: scripts-test
    </switchman>
    package: conf-test-scripts
</crontab>

=cut

=head1 NAME

apiReportsClean.pl

=head1 DESCRIPTION

Скрипт удаления старых задач и файлов отчётов статистики в API.

=cut

use Direct::Modern;

use Yandex::DBQueue;

use Yandex::DBTools;

use Direct::Storage;
use EnvTools;
use Settings;
use ScriptHelper sharded => 1, 'Yandex::Log' => 'messages';

use JSON qw/to_json/;

use API::Reports::OfflineReportTask;

# время жизни отчета (секунд)
my $REPORT_LIFE_TIME = 5 * 3_600;

# время жизни файла отчета (секунд)
my $REPORT_FILE_LIFE_TIME = 2 * 5 * 3_600;

# сколько записей можно удалить за одну итерацию
my $DELETION_LIMIT = 5000;

# сколько файлов можно удалить за одну итерацию
my $FILE_DELETION_LIMIT = 5000;

# сколько записей в таблице мэппинга можно удалить за одну итерацию
my $MAPPING_DELETION_LIMIT = 5000;

sub _log {
    my ($message) = @_;
    $log->out( { shard => $SHARD, message => $message } );
}

my $log_deletion_callback = sub {
    my ($jobs) = @_;

    for my $job (@$jobs) {
        my $job_out = {
            job_id => $job->job_id,
            status => $job->status,
            priority => $job->priority,
            ClientID => $job->ClientID,
            uid => $job->uid,

            # в лог не пишем полный запрос отчета - бывают очень длинными
            args => substr( to_json( $job->args, { canonical => 1 } ), 0, 5000 ),

            create_time => $job->create_time,
            trycount => $job->trycount,
            result => $job->has_result ? $job->result : undef,
        };

        _log( { deleted_dbqueue_job => $job_out } );
    }

};

_log('START');

_log('removing old jobs');
my $queue = Yandex::DBQueue->new( PPC( shard => $SHARD ), API::Reports::OfflineReportTask::DBQUEUE_JOB_TYPE );
my $deleted_jobs = $queue->delete_old_jobs($REPORT_LIFE_TIME, callback => $log_deletion_callback, limit => $DELETION_LIMIT);
_log({deleted_jobs => $deleted_jobs});

_log('removing old files');
my $storage = Direct::Storage->new();
my $deleted_files = $storage->remove_old_files( $SHARD, API::Reports::OfflineReportTask::STORAGE_FILE_TYPE, $REPORT_FILE_LIFE_TIME,
    limit => $FILE_DELETION_LIMIT);

for my $deleted_file (@$deleted_files) {
    _log( { deleted_file => $deleted_file } );
}

_log('removing orphaned mappings from ReportName to dbqueue_job_id');

my $mapping_rows = get_all_sql( PPC( shard => $SHARD ), [
    'SELECT api_reports_offline.taskid, api_reports_offline.ClientID, api_reports_offline.ReportName, api_reports_offline.dbqueue_job_id',
    'FROM api_reports_offline',
    'LEFT JOIN dbqueue_jobs ON api_reports_offline.dbqueue_job_id = dbqueue_jobs.job_id',
    'LEFT JOIN dbqueue_job_archive ON api_reports_offline.dbqueue_job_id = dbqueue_job_archive.job_id',
    WHERE => { 'dbqueue_jobs.job_id__is_null' => 1, 'dbqueue_job_archive.job_id__is_null' => 1 },
    LIMIT => $MAPPING_DELETION_LIMIT,
] );

_log( { found_mapping_rows => scalar(@$mapping_rows) } );

for my $row (@$mapping_rows) {
    _log( { deleting_mapping_row => $row } );
}

if ( my @taskids = map { $_->{taskid} } @$mapping_rows ) {
    my $affected_rows = do_sql( PPC( shard => $SHARD ), [ 'DELETE FROM api_reports_offline', WHERE => { taskid => \@taskids } ] );
    _log( { affected_mapping_rows => int $affected_rows } );
}

_log('Sending OK to juggler');

juggler_ok(service_suffix => (EnvTools::is_sandbox() ? 'sandbox' : undef));

_log('FINISH');
