#!/usr/bin/perl

=head2 NAME
    
    DIRECT-31082 - подсчет статистики по использованию целей метрики

=head2 DESCRIPTION

    Скрипт подсчитывает количество различных целей, используемых:
        * в условиях ретаргетинга
        * в кампаниях для оптимизаций
        * всего для ретаргетинга и кампаний
        * одновременно для ретаргетинга и кампаний
    А также считает количество клиентов, использующих хотя бы одну цель для ретаргетинга и кампании одновременно.

    В качестве "используемых" целей используются пересечение (для отсеивания ошибок и устаревших данных) таблиц:
        * для ретаргетинга - retargeting_conditions и retargeting_goals
        * для кампаний - campaigns и camp_metrika_goals

    На чтение ходит в реплику HEAVY (т.к. запросы небольшие, но требуют полного сканирования таблиц)

=cut

use warnings;
use strict;

use my_inc '../..';

use List::MoreUtils qw/uniq/;
use JSON;

use Yandex::DBTools;

use lib::abs '..';
use Settings;
use ScriptHelper;

$log->out('START');

my %retargeting_goals;
$log->out('Fetching data from retargeting_goals');
my $sth = exec_sql(PPC(shard => 'all'), 'SELECT ret_cond_id, goal_id FROM retargeting_goals');
while (my ($ret_cond_id, $goal_id) = $sth->fetchrow_array) {
    $retargeting_goals{$goal_id} //= {};
    $retargeting_goals{$goal_id}->{$ret_cond_id}++;
}
$sth->finish();
$log->out('Fetching data from retargeting_goals ... done');


my %goals_used_in_retargeting;    # goal_id -> ClientID -> undef
$log->out('Fetching data from retargeting_conditions');
$sth = exec_sql(PPC(shard => 'all'), 'SELECT ret_cond_id, clientid, is_deleted, condition_json FROM retargeting_conditions');
while (my ($ret_cond_id, $clientid, $is_deleted, $condition_json) = $sth->fetchrow_array) {
    my $condition = from_json($condition_json);
    foreach my $goals (@$condition) {
        foreach my $goal (@{$goals->{goals}}) {
            next unless $goal->{goal_id};
            if (!$is_deleted && exists $retargeting_goals{ $goal->{goal_id} }) {
                $goals_used_in_retargeting{ $goal->{goal_id} } //= {};
                $goals_used_in_retargeting{ $goal->{goal_id} }->{$clientid} = undef;
            }
        }
    }
}
$sth->finish();
$log->out('Fetching data from retargeting_conditions ... done');
%retargeting_goals = ();


my %camp_metrika_goals_by_cid;
$log->out('Fetching data from camp_metrika_goals');
$sth = exec_sql(PPC(shard => 'all'), 'SELECT cid, goal_id FROM camp_metrika_goals');
while (my ($cid, $goal_id) = $sth->fetchrow_array) {
    $camp_metrika_goals_by_cid{$cid} //= {};
    $camp_metrika_goals_by_cid{$cid}->{$goal_id} = undef;
}
$sth->finish();
$log->out('Fetching data from camp_metrika_goals ... done');


my %goals_used_in_campaign;     # goal_id -> cid -> ClientID
$log->out('Fetching data from campaigns');
$sth = exec_sql(PPC(shard => 'all'), 'SELECT STRAIGHT_JOIN c.cid, c.autobudget_goal_id, u.ClientID FROM campaigns c JOIN users u USING (uid) WHERE autobudget_goal_id IS NOT NULL AND autobudget = "Yes"');
while (my ($cid, $autobudget_goal_id, $ClientID) = $sth->fetchrow_array) {
    if ($autobudget_goal_id
        && exists $camp_metrika_goals_by_cid{$cid}
        && exists $camp_metrika_goals_by_cid{$cid}->{$autobudget_goal_id}
    ) {
        $goals_used_in_campaign{$autobudget_goal_id} //= {};
        $goals_used_in_campaign{$autobudget_goal_id}->{$cid} = $ClientID;
    } elsif ($autobudget_goal_id == 0
             && exists $camp_metrika_goals_by_cid{$cid}
    ) {
        foreach my $goal_id (keys %{ $camp_metrika_goals_by_cid{$cid} }) {
            $goals_used_in_campaign{$goal_id} //= {};
            $goals_used_in_campaign{$goal_id}->{$cid} = $ClientID;
        }
    }
}
$sth->finish();
$log->out('Fetching data from campaigns ... done');

my $cnt_both = 0;
my %clients_used_both;
$log->out('Search intersection between campaigns and retargetings goals');
while (my ($goal_id, $data) = each %goals_used_in_campaign) {
    next unless exists $goals_used_in_retargeting{$goal_id};
    $cnt_both++;
    foreach my $ClientID (values %$data) {
        next unless exists $goals_used_in_retargeting{$goal_id}->{$ClientID};
        $clients_used_both{$ClientID} = undef;
    }
}
$log->out('Search intersection between campaigns and retargetings goals ... done');

$log->out("Different goals used in retargeting: " . (scalar keys %goals_used_in_retargeting));
$log->out("Different goals used in campaigns: " . (scalar keys %goals_used_in_campaign));
$log->out("Different goals used in campaigns and retargetings: " . (scalar uniq (keys (%goals_used_in_retargeting), keys (%goals_used_in_campaign))));
$log->out("Different goals used both in campaigns and retargetings: $cnt_both");
$log->out("Different clients used goals both in campaigns and retargetings: " . (scalar keys %clients_used_both));

$log->out('FINISH');
