$dir = ?;
$day = ?;
$output = ?;

$src_table = $dir || "/" || $day;
$insert_detailed_table = $output || "/byDays/" || $day;
$insert_title_table = $output || "/summaryTitle";
$insert_snippet_table = $output || "/summarySnippet";
$parse = DateTime::Parse("%Y-%m-%d");
$format = DateTime::Format("%Y-%m-%d");

$options = Yson::Options(true as Strict, true as AutoConvert);
$MESSAGE_PREFIX = "Apply UC text generation: ";

$data = (
    select
        log_time as log_time,
        trace_id as log_req_id,
        Yson::ParseJson(Substring(message, length($MESSAGE_PREFIX))) as data
    from $src_table
    where String::StartsWith(message, $MESSAGE_PREFIX)
        AND method = 'ad_generation.ad_generation.save_results'
        AND service = 'direct.web'
        AND class_name = 'ru.yandex.direct.web.entity.adgeneration.controller.AdGenerationController'
        AND CAST(Substring(message, length($MESSAGE_PREFIX)) as Json) is not null
);

$is_multiple = ($savedTitles, $savedSnippets, $suggestionsGenerationResponses) -> {
    RETURN ListHasItems($savedTitles) or ListHasItems($savedSnippets) or ListHasItems($suggestionsGenerationResponses)
};

$toMultipleSuggestions = ($generationResponse) -> {
    RETURN AsStruct(
        $generationResponse.reqId as reqId,
        $generationResponse.additionalInfo as additionalInfo,
        AsList($generationResponse.title) as titles,
        AsList($generationResponse.snippet) as bodies
    )
};

$data = (
    select
        log_time,
        log_req_id,
        Yson::ConvertTo(data, Struct<
            campaignId: uint64,
            newCampaign: bool,
            textSingle: bool,
            savedTitle: String,
            savedTitles: List<String>,
            savedSnippet: String,
            savedSnippets: List<String>,
            generationResponses: List<Struct<
                reqId: Uint64,
                additionalInfo: Struct<
                    Url: String,
                    warnings: List<String>,
                    algVersion: String
                >,
                title: String,
                snippet: String
            >>,
            suggestionsGenerationResponses: List<Struct<
                reqId: Uint64,
                additionalInfo: Struct<
                    Url: String,
                    warnings: List<String>,
                    algVersion: String
                >,
                titles: List<String>,
                bodies: List<String>
            >>
        >, $options) as data
    from $data
);

$data = (
    select
        log_time,
        log_req_id,
        data.campaignId as cid,
        data.textSingle or not $is_multiple(data.savedTitles, data.savedSnippets, data.suggestionsGenerationResponses) as textSingle,
        data.newCampaign as newCampaign,
        if (ListHasItems(data.savedTitles), data.savedTitles, AsList(data.savedTitle)) as savedTitles,
        if (ListHasItems(data.savedSnippets), data.savedSnippets, AsList(data.savedSnippet)) as savedSnippets,
        if (ListHasItems(data.generationResponses),
            $toMultipleSuggestions(data.generationResponses[ListLength(data.generationResponses)-1]),
            if (ListHasItems(data.suggestionsGenerationResponses), data.suggestionsGenerationResponses[ListLength(data.suggestionsGenerationResponses)-1], null))
        as suggest
    from $data
);

$data = (
    select
        log_time, log_req_id,
        cid, newCampaign, textSingle,
        savedTitles, savedSnippets,
        suggest.titles as titles,
        suggest.bodies as snippets,
        suggest.additionalInfo as additionalInfo
    from $data
);

-- START QUALITY FUNCTIONS

$length = ($text) -> {
    RETURN Unicode::GetLength(cast($text as Utf8));
};

$MIN_SIMILAR_PREFIX_LENGTH=3;
$MAX_SIMILAR_SUFFIX_LENGTH=3;

$cut_word = ($word, $cut_length) -> {
    RETURN Unicode::Substring(
        cast($word as Utf8),
        0,
        cast(min_of(
            $length($word),
            $cut_length
        ) as uint32)
    );
};

$compare_word = ($a_word, $s_word) -> {
    $cut_length = cast(max_of(
        $MIN_SIMILAR_PREFIX_LENGTH,
        $length($a_word) - $MAX_SIMILAR_SUFFIX_LENGTH,
        $length($s_word) - $MAX_SIMILAR_SUFFIX_LENGTH) as Uint32);
    RETURN $a_word == $s_word
        or $cut_word($a_word, $cut_length) == $cut_word($s_word, $cut_length);
};

$uniqWords = ($text) -> {
    RETURN ListUniq(ListFilter(
        String::SplitToList($text, " "),
        ($w) -> {
            RETURN $length($w) > 3
        }
    ));
};

$positiveOrDefault = ($value, $default) -> {
    RETURN if($value is not null and $value > 0, $value, $default);
};


$countUsedWords = ($save, $suggest) -> {
    $save = $uniqWords($save);
    $suggest = $uniqWords($suggest);
    $intersection = ListFilter($save, ($w) -> {RETURN ListLength(ListFilter($suggest, ($s) -> {RETURN $compare_word($s, $w);})) > 0;});
    RETURN AsStruct(
        $positiveOrDefault(ListLength($save), 999) as save,
        $positiveOrDefault(ListLength($suggest), 999) as suggest,
        $positiveOrDefault(ListLength($intersection), 0) as intersection,
    );
};

$maxSubstring = ($save, $suggest) -> {
    $list = String::SplitToList($save, " ");
    $check = ListFlatten(
        ListMap(
            ListMap(
                ListFromRange(0, ListLength($list)),
                ($startIndex) -> {
                    RETURN ListSkip($list, $startIndex);
                }
            ),
            ($l) -> {
                RETURN ListMap(
                    ListFromRange(0, ListLength($l)),
                    ($num) -> {
                        RETURN ListTake($l, 1+$num);
                    }
                );
            }
        )
    );
    $check = ListSortDesc(
        ListFilter(
            ListMap(
                $check,
                ($l) -> {
                    RETURN nvl(ListConcat($l, " "), "  ");
                }
            ),
            ($s) -> {
                RETURN String::Contains($suggest, $s);
            }
        ),
        ($s) -> {
            RETURN $length($s);
        }
    );
    RETURN AsStruct(
        $positiveOrDefault($length($save), 999) as save,
        $positiveOrDefault($length($suggest), 999) as suggest,
        $positiveOrDefault(if(ListLength($check) > 0, $length($check[0]), 0), 0) as intersection
    );
};

$formatText = ($text) -> {
    $replacePunctuation = Re2::Replace("[\\s\\[\\]\\-+,.\\\"!" || '?' || "\\\\()%$€;:\\/&'*_=#№«»–—− ™®©’°⁰¹²³⁴⁵⁶⁷⁸⁹]+");
    $text = $replacePunctuation($text, " ");
    RETURN Unicode::ToLower(cast($text as Utf8));
};

$result = ($score) -> {
    RETURN 1.0 * $score.intersection / $score.save;
};

$multiSuggestResultByWordSet = ($save, $multi_suggest) -> {
    RETURN ListMap(
                    ListMap(
                            $multi_suggest,
                            ($suggest) -> {RETURN $countUsedWords($save, $suggest)}),
                    ($score) -> {RETURN $result($score)});
};

$multiSuggestResultBySubstring = ($save, $multi_suggest) -> {
    RETURN ListMap(
                    ListMap(
                            $multi_suggest,
                            ($suggest) -> {RETURN $maxSubstring($save, $suggest)}),
                    ($score) -> {RETURN $result($score)});
};

$score_multi_suggest = ($multi_save, $multi_suggest, $skipSmallSymbols) -> {
    $specialSymbols = Re2::Count("[,.\"!;:]");
    $length = ListMax(ListMap($multi_suggest, ($suggest) -> {RETURN $length($suggest) - if($skipSmallSymbols, $specialSymbols($suggest), 0)}));
    $multi_save = ListMap($multi_save, ($save) -> {RETURN $formatText($save)});
    $multi_suggest = ListMap($multi_suggest, ($suggest) -> {RETURN $formatText($suggest)});
    $byWordSet = ListAvg(ListMap($multi_save, ($save) -> {RETURN ListMax($multiSuggestResultByWordSet($save, $multi_suggest))}
        ));
    $bySubstring = ListAvg(ListMap($multi_save, ($save) -> {RETURN ListMax($multiSuggestResultBySubstring($save, $multi_suggest))}));
    RETURN AsStruct(
            $multi_save as save,
            $multi_suggest as suggest,
            $byWordSet as byWordSet,
            $bySubstring as bySubstring,
            $length as len,
        );
};

-- FINISH QUALITY FUNCTIONS

$data = (
    select
        log_time,
        log_req_id,
        cid,
        textSingle, newCampaign,
        savedTitles, titles, $score_multi_suggest(savedTitles, titles, false) as titleResult,
        savedSnippets, snippets, $score_multi_suggest(savedSnippets, snippets, true) as snippetResult,
        additionalInfo
    from $data
);

$detailed = (
    select
        log_time,
        log_req_id,
        cid,
        textSingle as isTextSingle,
        newCampaign as isNewCampaign,
        savedTitles as savedTitle,
        titles as suggestTitle,
        savedSnippets as savedSnippet,
        snippets as suggestSnippet,
        titleResult.len as titleLength,
        snippetResult.len as snippetLength,
        if(titles is null, "NONE", if(titleResult.len = 0, "EMPTY", "OK")) as titleType,
        if(snippets is null, "NONE", if(snippetResult.len = 0, "EMPTY", "OK")) as snippetType,
        titleResult.byWordSet as titleByWordSet,
        titleResult.bySubstring as titleBySubstring,
        snippetResult.byWordSet as snippetByWordSet,
        snippetResult.bySubstring as snippetBySubstring,
        additionalInfo.Url as url,
        additionalInfo.algVersion as algVersion
    from $data
);

insert into $insert_detailed_table with truncate
select
    log_time, log_req_id, cid,
    isTextSingle, isNewCampaign,
    titleType, snippetType,
    titleByWordSet, titleBySubstring, snippetByWordSet, snippetBySubstring,
    savedTitle, suggestTitle, savedSnippet, suggestSnippet,
    titleLength, snippetLength,
    url, algVersion
from $detailed
order by log_time;

$titleSummary = (
    select
        $day as day,
        count(*) as cases,
        count_if(titleType != "NONE") as requests,
        count_if(titleType = "OK") as suggests,
        ";" || ListConcat(AsList(
            "GOOD=" || cast(COUNT_IF(titleByWordSet > 0.5 or titleBySubstring > 0.5) as String),
            "PERFECT=" || cast(COUNT_IF(titleBySubstring = 1.0) as String),
            "USELESS=" || cast(COUNT_IF(titleType = "OK" and titleByWordSet = 0.0) as String),
            "overlimit=" || cast(COUNT_IF(titleLength > 56) as String),
        ), ";") || ";" as additional
    from $detailed
);

$snippetSummary = (
    select
        $day as day,
        count(*) as cases,
        count_if(snippetType != "NONE") as requests,
        count_if(snippetType = "OK") as suggests,
        ";" || ListConcat(AsList(
            "GOOD=" || cast(COUNT_IF(snippetByWordSet > 0.5) as String),
            "PERFECT=" || cast(COUNT_IF(snippetBySubstring = 1.0) as String),
            "USELESS=" || cast(COUNT_IF(snippetType = "OK" and snippetByWordSet = 0.0) as String),
            "overlimit=" || cast(COUNT_IF(snippetLength > 81) as String),
        ), ";") || ";" as additional
    from $detailed
);

select * from $titleSummary;
select * from $snippetSummary;

$titleSummary = (
    select * from $titleSummary
    where day not in (select day from $insert_title_table where day = $day)
);

$snippetSummary = (
    select * from $snippetSummary
    where day not in (select day from $insert_snippet_table where day = $day)
);

insert into $insert_title_table
select day, cases, requests, suggests, additional from $titleSummary;

insert into $insert_snippet_table
select day, cases, requests, suggests, additional from $snippetSummary;
