#!/usr/bin/perl

=head1 NAME

    get_duplicate_phrases_ids.t

=head1 DESCRIPTION

    Юнит-тест, проверяющий функцию Mediaplan::get_duplicate_phrases_ids().
    Данная функция ходит в БК, поэтому иметь такой тест для ежекоммитного запуска не очень хорошо, 
    к тому же тест закладывается на ctr, который возвращает БК, а он со временем может поменяться.
    Непосредственное вычисление дублей происходит в функции PhraseDoubles::get_all_doubles(), на которую есть полноценный 
    юнит-тест (PhraseDoubles/get_all_doubles.t).
    Для выбора кто же из всех дублей является главным дублей используется функция Mediaplan::is_first_phrase_winner_by_ctr(),
    на которую тоже есть юнит-тест (Mediaplan/phrase_winner_by_ctr.t).

=cut

use warnings;
use strict;
use Test::More;

use Test::Deep;

use Mediaplan qw/get_duplicate_phrases_ids/;

use Settings;
use Yandex::DBUnitTest qw/init_test_dataset/;
use List::MoreUtils qw/uniq/;
use utf8;
use open ':std' => ':utf8';

my $shows_g = 500;
my $shows_l = 105;
my $d  = 'ya.ru';
my $d2 = 'ya.com';
my $g  = '255';
my $g2 = '253';

sub banner {
	my @p = @_;
	return {cid=>$p[0], mbid=>$p[1], domain=>$p[2], geo=>$p[3], Phrases=>[map {ph($p[0], $p[1], @{$_})} @{$_[4]}], 
	        statusShowsForecast=>'Processed'};
}

sub ph {
	return {cid=>$_[0], mbid=>$_[1], id=>$_[2], phrase=>$_[3], showsForecast=>$_[4], numword=>1};
}
my @phrases_fields = qw/cid mbid id phrase/;

my %db = (
    campaigns => {
        original_db => PPC,
        rows => [],
    },
    mediaplan_banners => {
    	original_db => PPC,
    	rows => [],
    },
    mediaplan_bids => {
        original_db => PPC,
        rows => [],
    },
    vcards => {
        original_db => PPC,
        rows => [],
    },
    minus_words => {
        original_db => PPC,
        rows => [],
    },
    maps => {
        original_db => PPC,
        rows => [],
    },
    addresses => {
        original_db => PPC,
        rows => [],
    },
    camp_options => {
        original_db => PPC,
        rows => [],
    },
    forecast_ctr => {
        original_db => PPCDICT,
        rows => [],
    },

);

my @tests = (
	[{cid => 101, banners=>[[101, $d, $g, [[1011, "белый", $shows_g]]], [102, $d, $g, [[1021, "белое", $shows_l]]]]}, [1021], "Same formas, diff lemmas 1"],

    [{cid => 102, banners=>[[103, $d, $g, [[1031, "белый", $shows_g]]], [104, $d, $g, [[1041, "белая", $shows_l]]]]}, [1041], "Same lemmas, diff formas"],

	[{cid => 103, banners=>[[105, $d, $g, [[1051, "купи", $shows_g]]], [106, $d, $g, [[1061, "купить", $shows_l]]]]}, [1061], "Same lemmas, diff formas"],

	[{cid => 104, banners=>[[107, $d, $g, [[1071, "купи", $shows_g]]], [108, $d, $g, [[1081, "покупать", $shows_l]]]]}, [], "Diff words"],

	[{cid => 105, banners=>[[109, $d, $g, [[1091, "пластиковые окна", $shows_g]]], [110, $d, $g, [[1101, "пластиковое окно", $shows_l]]]]}, [1101], "Several words"],

	[{cid => 106, banners=>[[111, $d, $g, [[1111, "кошка 2 года", $shows_g]]], [112, $d, $g, [[1121, "кошка года", $shows_l]]]]}, [], "Words with numbers"],

    [{cid => 107, banners=>[[113, $d, $g, [[1131, "рукавные разветвления", $shows_g]]], 
                            [114, $d, $g, [[1141,  "разветвления рукавные", $shows_l]]]]}, [1141], "Diff word order"],

    [{cid => 108, banners=>[[115, $d, $g, [[1151, "белый купи", $shows_g]]], 
                            [116, $d, $g, [[1161,  "белая купить", $shows_l]]]]}, [1161], "Diff word order"],

    [{cid => 109, banners=>[[117, $d, $g, [[1171, "подъемный стол", $shows_l]]], 
                            [118, $d, $g, [[1181,  "подъемные столы", $shows_g]]]]}, [1171], "Diff orig formas 1"],

    [{cid => 110, banners=>[[119, $d2, $g, [[1191, "белый", $shows_l]]], 
                            [120, $d, $g, [[1201,  "белая", $shows_g]]]]}, [], "Diff domains"],
	
    [{cid => 111, banners=>[[121, 'www.abcdefg.ru', $g, [[1211, "белый", $shows_l]]], 
                            [122, 'www.abcdefg.ru', $g, [[1221,  "белая", $shows_g]]]]}, [1211], "Diff domain: with www and without www"],

    [{cid => 112, banners=>[[123, $d, $g2, [[1231, "белый", $shows_l]]], 
                            [124, $d, $g, [[1241,  "белая", $shows_g]]]]}, [], "Diff geo"],

    [{cid => 113, banners=>[[125, $d, $g, [[1251, "белый", $shows_l], [1252, "купи", $shows_g]]], 
                            [126, $d, $g, [[1261,  "белая", $shows_g], [1262, "купи", $shows_l]]]]}, [1251, 1262], "Several phrases"],

    [{cid => 114, banners=>[[127, $d, $g, [[1271, "белый", $shows_l]]], 
                            [128, $d, $g, [[1281,  "белое", $shows_g]]],
                            [129, $d, $g, [[1291,  "белое", $shows_g+300]]]]}, [1271,1291], "Several banners"],

    [{cid => 115, banners=>[[130, $d, $g, [[1301, "белый", $shows_l], [1302, "купи", $shows_g]]], 
                            [131, $d, $g, [[1311,  "белое", $shows_g], [1312, "купи", $shows_g+300]]],
                            [132, $d, $g, [[1321,  "белое", $shows_g+300], [1322, "купи", $shows_l]]]]}, [1301,1321, 1312,1322], "Several banners and phrases"],

    [{cid => 116, banners=>[[133, $d, $g, [[1331, "белый", $shows_l], [1332, "купи", $shows_g]]], 
                            [134, $d, $g, [[1341,  "белое", $shows_g], [1342, "купи", $shows_g+300]]],
                            [135, $d2, $g, [[1351,  "белый", $shows_g], [1352, "купи", $shows_g+300]]],
                            [136, $d, $g, [[1361,  "белое", $shows_g+300], [1362, "купить", $shows_l]]]]}, [1361, 1331, 1362, 1342], "Several banners and phrases with diff domain"],

    [{cid => 117, banners=>[[137, $d, $g, [[1371, "ШПО-102", $shows_l], [1372, "ШПО-103", $shows_g]]], 
                            [138, $d, $g, [[1381,  "ШПО-105", $shows_l]]]]}, [], "Several phrases"],

    [{cid => 118, banners=>[[139, $d, $g, [[1391,  '"слон белый"', $shows_g]]], [140, $d, $g, [[1401, 'слон белый', $shows_l]]]]}, [], "Words with quotes"],
                           
	);

# Заполняем таблицу campaigns
push @{$db{campaigns}->{rows}}, map { {cid => $_} } uniq map {$_->[0]->{cid}} @tests ;
foreach my $test (@tests) {
    # Заполняем таблицы mediaplan_banners и mediaplan_bids
	my @banners = map {banner($test->[0]->{cid}, @{$_})} @{$test->[0]->{banners}};
	my @phrases = map {@{delete $_->{Phrases}}} @banners;
	push @{$db{mediaplan_banners}->{rows}}, @banners;
	push @{$db{mediaplan_bids}->{rows}}, @phrases;
}
init_test_dataset(\%db);

my $doubles;
foreach my $test (@tests) {
	$doubles  = get_duplicate_phrases_ids([$test->[0]->{cid}]);
 	is_deeply ([sort @$doubles], [sort @{$test->[1]}], $test->[2]);
}

Test::More::done_testing(scalar (@tests));

1;
