$dir = ?;
$day = ?;
$output = ?;

$src_table = $dir || "/" || $day;
$insert_detailed_table = $output || "/byDays/" || $day;
$insert_summary_table = $output || "/summary";
$parse = DateTime::Parse("%Y-%m-%d");
$format = DateTime::Format("%Y-%m-%d");

$options = Yson::Options(true as Strict, true as AutoConvert);
$MESSAGE_PREFIX = "Apply UC keywords generation: ";

$data = (
    select
        log_time as log_time,
        trace_id as log_req_id,
        Yson::ParseJson(Substring(message, length($MESSAGE_PREFIX))) as data
    from $src_table
    where String::StartsWith(message, $MESSAGE_PREFIX)
        AND method = 'ad_generation.ad_generation.save_results'
        AND service = 'direct.web'
        AND class_name = 'ru.yandex.direct.web.entity.adgeneration.controller.AdGenerationController'
        AND CAST(Substring(message, length($MESSAGE_PREFIX)) as Json) is not null
);

$data = (
    select
        log_time,
        log_req_id,
        Yson::ConvertTo(data, Struct<
            campaignId: uint64,
            savedKeywords: List<String>,
            generationIterations: List<Struct<
                addKeywords: List<String>,
                generationResponse: Struct<
                    reqId: Uint64,
                    keywords: List<String>,
                    additionalInfo: Struct<
                        warnings: List<String>,
                        BannerURL: String,
                        BannerTitle: String,
                        BannerText: String,
                        RegionIds: List<String>
                    >
                >
            >>
        >, $options) as data, data as example
    from $data
);

$data = (
    select example,
        log_time,
        log_req_id,
        data.campaignId as cid,
        data.savedKeywords as saved,
        if (ListLength(data.generationIterations) = 0, null, data.generationIterations[ListLength(data.generationIterations)-1].generationResponse) as suggest
    from $data
);

-- START QUALITY FUNCTIONS

$MIN_SIMILAR_PREFIX_LENGTH=3;
$MAX_SIMILAR_SUFFIX_LENGTH=3;
$DIFF_FORM_SIMILARITY=1;
$SIMILAR_LIMIT = 0.1;
$USEFULL_LIMIT = 0.9;

$cut_word = ($word, $cut_length) -> {
    RETURN Substring(
        $word,
        0,
        min_of(
            Length($word),
            $cut_length
        )
    );
};

$compare_word = ($a_word, $s_word) -> {
    $cut_length = cast(max_of(
        $MIN_SIMILAR_PREFIX_LENGTH,
        Length($a_word) - $MAX_SIMILAR_SUFFIX_LENGTH,
        Length($s_word) - $MAX_SIMILAR_SUFFIX_LENGTH) as Uint32);
    RETURN if(
        String::ToLower($a_word) == String::ToLower($s_word),
        1.0,
        if(
            $cut_word($a_word, $cut_length) == $cut_word($s_word, $cut_length),
            $DIFF_FORM_SIMILARITY,
            0.0
        )
    );
};

$split_phrase = ($phrase) -> {
    return String::SplitToList($phrase, " ", true, true);
};

$split_phrases = ($phrases) -> {
    return String::SplitToList($phrases, ",", true, true);
};

$find_word_in_phrase = ($a_word, $s_phrase) -> {
    $s_words = $split_phrase($s_phrase);
    $compares = ListMap($s_words, ($s_word) -> { RETURN $compare_word($a_word, $s_word) });
    RETURN ListMax($compares);
};

$calc_phrase_usefull_for_phrase = ($a_phrase, $s_phrase) -> {
    $a_words = $split_phrase($a_phrase);
    $compares = ListMap($a_words, ($a_word) -> { RETURN $find_word_in_phrase($a_word, $s_phrase) });
    RETURN ListSum($compares) / ListLength($a_words);
};

$calc_phrases_usefull_for_phrase = ($a_phrase, $s_phrases) -> {
    $compares = ListMap($s_phrases, ($s_phrase) -> { RETURN $calc_phrase_usefull_for_phrase($a_phrase, $s_phrase) });
    RETURN $compares;
};

$calc_usefull_for_phrase = ($a_phrase, $s_phrases) -> {
    $compares = $calc_phrases_usefull_for_phrase($a_phrase, $s_phrases);
    RETURN ListMax($compares);
};

$calc_usefull = ($a_phrases, $s_phrases) -> {
    $compares = ListMap($a_phrases, ($a_phrase) -> { RETURN $calc_usefull_for_phrase($a_phrase, $s_phrases) });
    RETURN ListSum($compares) / ListLength($a_phrases);
};

$find_usefull_for_phrase = ($a_phrase, $s_phrases) -> {
    $compares = $calc_phrases_usefull_for_phrase($a_phrase, $s_phrases);
    $max = max_of($SIMILAR_LIMIT, $USEFULL_LIMIT * ListMax($compares));
    $usefull = ListFilter(ListEnumerate($compares), ($x) -> { RETURN $x.1 >= $max });
    RETURN ListMap($usefull, ($x) -> { RETURN $x.0 });
};

$calc_useless = ($a_phrases, $s_phrases) -> {
    $usefull_by_phrases = ListMap($a_phrases, ($a_phrase) -> { RETURN $find_usefull_for_phrase($a_phrase, $s_phrases) });
    $usefull = ListUniq(ListFlatten($usefull_by_phrases));
    RETURN 1 - 1.0 * ListLength($usefull) / ListLength($s_phrases);
};

$formatKeyword = ($keyword) -> {
    RETURN String::RemoveAll($keyword, "+");
};

$count_keywords = ($save, $suggest) -> {
    $save = ListMap($save, $formatKeyword);
    $suggest = ListMap($suggest, $formatKeyword);
    $saveSuggest = ListFilter($save, ($k) -> {RETURN ListHas($suggest, $k);});
    RETURN AsStruct(
        ListLength($suggest) as suggestNum,
        ListLength($saveSuggest) as saveSuggestNum,
        ListLength($save) as saveNum
    );
};

-- FINISH QUALITY FUNCTIONS

$data = (
    select example,
        log_time,
        log_req_id,
        cid,
        saved,
        ListLength(suggest.keywords) as num,
        ListTake(suggest.keywords, 10) as suggest,
        $count_keywords(saved, ListTake(suggest.keywords, 10)) as nums,
        suggest.additionalInfo as additionalInfo
    from $data
);

$detailed = (
    select example,
        log_time,
        log_req_id,
        cid,
        saved,
        suggest,
        num as num,
        nums.saveSuggestNum as saveSuggestNum,
        nums.suggestNum as suggestNum,
        nums.saveNum as saveNum,
        if(suggest is null, "NONE", if(ListLength(suggest) = 0, "EMPTY", "OK")) as type,
        $calc_usefull(saved, suggest) as usefull,
        $calc_useless(saved, suggest) as useless,
        ListConcat(["regions=[", ListConcat(additionalInfo.RegionIds, ", "), "] ; url='", additionalInfo.BannerURL, "' ; title='", additionalInfo.BannerTitle, "' ; body='", additionalInfo.BannerText, "'"]) as additional_params
    from $data
);

insert into $insert_detailed_table with truncate
select
    log_time, log_req_id,
    cid, type, saved, suggest, num, saveSuggestNum, suggestNum, saveNum, usefull, useless, additional_params
from $detailed
order by log_time;

$keywordsNumStat = (
    select
        ListConcat(AsList(
            "suggest_sum=" || cast(sum(suggestNum) as String),
            "save_suggest_sum=" || cast(sum(saveSuggestNum) as String),
            "save_sum_with_ok_suggest=" || cast(sum_if(saveNum, type = "OK") as String),
            "save_sum_with_empty_suggest=" || cast(sum_if(saveNum, type = "EMPTY") as String),
        ), ";")
    from $detailed
);

$numStat = (
    select
        ListConcat(AGGREGATE_LIST("suggest_num_" || cast(num as String) || "=" || cast(cnt as String)), ";")
    from (
        select
            num, count(*) as cnt
        from $detailed
        group by num
    )
);

$qualityStat = (
    select
        ListConcat(AGGREGATE_LIST("quality_" || quality || "=" || cast(cnt as String)), ";")
    from (
        select
            quality, count(*) as cnt
        from $detailed
        where num > 0
        group by
            if(10*usefull-4*useless = -4, "-4",
            if(10*usefull-4*useless < -3, "-4_-3",
            if(10*usefull-4*useless < -2, "-3_-2",
            if(10*usefull-4*useless < -1, "-2_-1",
            if(10*usefull-4*useless < 0, "-1_0",
            if(10*usefull-4*useless < 1, "0_1",
            if(10*usefull-4*useless < 2, "1_2",
            if(10*usefull-4*useless < 3, "2_3",
            if(10*usefull-4*useless < 4, "3_4",
            if(10*usefull-4*useless < 5, "4_5",
            if(10*usefull-4*useless < 6, "5_6",
            if(10*usefull-4*useless < 7, "6_7",
            if(10*usefull-4*useless < 8, "7_8",
            if(10*usefull-4*useless < 9, "8_9",
            if(10*usefull-4*useless < 10, "9_10",
            if(10*usefull-4*useless = 10, "10",
            "FAIL")))))))))))))))) as quality
    )
);

$summary = (
    select
        type, save, count(*) as cnt
    from $detailed
    group by type, save
);

$summary = (
    select
        $day as day,
        count(*) as cases,
        count_if(type != "NONE") as requests,
        count_if(type not in ("NONE", "EMPTY")) as suggests,
        ";" || ListConcat(AsList(
            $keywordsNumStat,
            $qualityStat,
            $numStat
        ), ";") || ";" as additional
    from $detailed
);

select * from $summary;

$summary = (
    select * from $summary
    where day not in (select day from $insert_summary_table where day = $day)
);

insert into $insert_summary_table
select day, cases, requests, suggests, additional from $summary;
