$src_dir = ?;
$day = ?;
$dst_dir = ?;

$messages = $src_dir || "/" || $day;
$keywords_table = $dst_dir || "/" || $day;
$statistic_table = $dst_dir || "/statistic/" || $day;

$options = Yson::Options(true as Strict, true as AutoConvert);
$MESSAGE_PREFIX = "Apply keywords generation: ";
$WARN_RO_RESULT = {
    "GenerationDefectIds.EMPTY_SEARCH_QUERY_API_RESPONSE": "EMPTY_RESULT",
    "GenerationDefectIds.CAMPAIGN_WITHOUT_HREF": "EMPTY_URL",
    "GenerationDefectIds.RICH_CONTENT_API_ERROR": "ERRORS",
    "GenerationDefectIds.SEARCH_QUERY_API_ERROR": "ERRORS"
};
$RESULT_ORDER = {
    "OK": 1,
    "EMPTY_URL": 2,
    "EMPTY_RESULT": 3,
    "ERRORS": 4
};

$MIN_SIMILAR_PREFIX_LENGTH=3;
$MAX_SIMILAR_SUFFIX_LENGTH=3;
$DIFF_FORM_SIMILARITY=1;
$SIMILAR_LIMIT = 0.1;
$USEFULL_LIMIT = 0.9;

-- FINISH SUGGEST FUNCTIONS

$ysonList_to_ListOfStrings = ($yson) -> {
    RETURN ListMap(Yson::ConvertToList($yson, $options), ($p) -> {RETURN Yson::ConvertToString($p, $options);})
};

$ysonList_to_Strings = ($yson) -> {
    RETURN ListConcat($ysonList_to_ListOfStrings($yson), ", ");
};

$ysonListOfWarnings_to_result = ($ysonWarnings, $keywords) -> {
    $warnings = $ysonList_to_ListOfStrings($ysonWarnings);
    $knownWarnings = ListFilter($warnings, ($w) -> {RETURN DictContains($WARN_RO_RESULT, $w);});
    RETURN
        if(Yson::GetLength($keywords) > 0, "OK",
        if(ListLength($knownWarnings) == 0, "UNKNOWN",
        $WARN_RO_RESULT[$knownWarnings[0]]
        ));
};

$yson_to_stringField_or_EmptyString = ($yson, $field) -> {
    RETURN if(Yson::Contains($yson, $field), Yson::LookupString($yson, $field, $options), "");
};

$ysonAdditionalInfo_to_additionalParams = ($yson) -> {
    $regions = if(Yson::Contains($yson, "RegionIds"), $ysonList_to_Strings($yson.RegionIds), "");
    $url = $yson_to_stringField_or_EmptyString($yson, "BannerURL");
    $title = $yson_to_stringField_or_EmptyString($yson, "BannerTitle");
    $body = $yson_to_stringField_or_EmptyString($yson, "BannerText");
    RETURN ListConcat(["regions=[", $regions, "] ; url='", $url, "' ; title='", $title, "' ; body='", $body, "'"]);
};

$yson_to_useDescription = ($yson) -> {
    RETURN $yson_to_stringField_or_EmptyString($yson, "TitleSource") != "Url" or $yson_to_stringField_or_EmptyString($yson, "TextSource") != "Url";
};

-- FINISH SUGGEST FUNCTIONS

-- START QUALITY FUNCTIONS

$cut_word = ($word, $cut_length) -> {
    RETURN Substring(
        $word,
        0,
        min_of(
            Length($word),
            $cut_length
        )
    );
};

$compare_word = ($a_word, $s_word) -> {
    $cut_length = cast(max_of(
        $MIN_SIMILAR_PREFIX_LENGTH,
        Length($a_word) - $MAX_SIMILAR_SUFFIX_LENGTH,
        Length($s_word) - $MAX_SIMILAR_SUFFIX_LENGTH) as Uint32);
    RETURN if(
        String::ToLower($a_word) == String::ToLower($s_word),
        1.0,
        if(
            $cut_word($a_word, $cut_length) == $cut_word($s_word, $cut_length),
            $DIFF_FORM_SIMILARITY,
            0.0
        )
    );
};

$split_phrase = ($phrase) -> {
    return String::SplitToList($phrase, " ", true, true);
};

$split_phrases = ($phrases) -> {
    return String::SplitToList($phrases, ",", true, true);
};

$find_word_in_phrase = ($a_word, $s_phrase) -> {
    $s_words = $split_phrase($s_phrase);
    $compares = ListMap($s_words, ($s_word) -> { RETURN $compare_word($a_word, $s_word) });
    RETURN ListMax($compares);
};

$calc_phrase_usefull_for_phrase = ($a_phrase, $s_phrase) -> {
    $a_words = $split_phrase($a_phrase);
    $compares = ListMap($a_words, ($a_word) -> { RETURN $find_word_in_phrase($a_word, $s_phrase) });
    RETURN ListSum($compares) / ListLength($a_words);
};

$calc_phrases_usefull_for_phrase = ($a_phrase, $s_phrases) -> {
    $compares = ListMap($s_phrases, ($s_phrase) -> { RETURN $calc_phrase_usefull_for_phrase($a_phrase, $s_phrase) });
    RETURN $compares;
};

$calc_usefull_for_phrase = ($a_phrase, $s_data) -> {
    $s_phrases = $split_phrases($s_data);
    $compares = $calc_phrases_usefull_for_phrase($a_phrase, $s_phrases);
    RETURN ListMax($compares);
};

$calc_usefull = ($a_data, $s_data) -> {
    $a_phrases = $split_phrases($a_data);
    $compares = ListMap($a_phrases, ($a_phrase) -> { RETURN $calc_usefull_for_phrase($a_phrase, $s_data) });
    RETURN ListSum($compares) / ListLength($a_phrases);
};

$find_usefull_for_phrase = ($a_phrase, $s_phrases) -> {
    $compares = $calc_phrases_usefull_for_phrase($a_phrase, $s_phrases);
    $max = max_of($SIMILAR_LIMIT, $USEFULL_LIMIT * ListMax($compares));
    $usefull = ListFilter(ListEnumerate($compares), ($x) -> { RETURN $x.1 >= $max });
    RETURN ListMap($usefull, ($x) -> { RETURN $x.0 });
};

$calc_useless = ($a_data, $s_data) -> {
    $s_phrases = $split_phrases($s_data);
    $a_phrases = $split_phrases($a_data);
    $usefull_by_phrases = ListMap($a_phrases, ($a_phrase) -> { RETURN $find_usefull_for_phrase($a_phrase, $s_phrases) });
    $usefull = ListUniq(ListFlatten($usefull_by_phrases));
    RETURN 1 - 1.0 * ListLength($usefull) / ListLength($s_phrases);
};

$calcQuality = ($suggests) -> {
    $suggests = ListSort($suggests, ($s) -> { RETURN $RESULT_ORDER[$s.result]; });
    $success = ListFilter($suggests, ($s) -> { RETURN $s.result == "OK" and Length($s.suggest) > 0; });
    RETURN AsStruct(
        ListLength($suggests) as countRequests,
        ListLength($success) as countSuccess,
        ListConcat(ListMap($success, ($s) -> { RETURN $s.suggest; }), ", ") as suggest,
        if(ListLength($suggests) == 0, "NOT_USED", $suggests[0].result) as result,
    );
};

$calcQualityByExperiments = ($suggests) -> {
    $suggestsByDescription = ListFilter($suggests, ($s) -> { RETURN $s.with_description; });
    $suggestsByUrl = ListFilter($suggests, ($s) -> { RETURN not $s.with_description; });
    RETURN AsStruct(
        $calcQuality($suggests) as common,
        $calcQuality($suggestsByDescription) as byDescription,
        $calcQuality($suggestsByUrl) as byUrl,
    );
};

-- FINISH QUALITY FUNCTIONS

-- START SELECTS

$data = (
    select
        log_time as log_time,
        trace_id as log_req_id,
        Yson::ParseJson(Substring(message, length($MESSAGE_PREFIX))) as data
    from $messages
    where String::StartsWith(message, $MESSAGE_PREFIX)
        AND method = 'ad_generation.ad_generation.save_results'
        AND service = 'direct.web'
        AND class_name = 'ru.yandex.direct.web.entity.adgeneration.controller.AdGenerationController'
        AND CAST(Substring(message, length($MESSAGE_PREFIX)) as Json) is not null
);

$maxSuggestNum = (select max(Yson::GetLength(data.generationIterations)) from $data where not Yson::IsEntity(data.generationIterations));
$maxSuggestNum = nvl($maxSuggestNum, 1);
$index = (select row_number() over w - 1 as index from $messages WINDOW w AS () limit $maxSuggestNum);

$suggests = (
    SELECT
        d.log_req_id as log_req_id,
        d.log_time as log_time,
        d.log_time as apply_time,
        Yson::ConvertToUint64(d.data.adGroupId, $options) as group_id,
        Yson::ConvertToUint64(d.data.saveReqId, $options) as apply_req_id,
        $ysonList_to_Strings(d.data.savedKeywords) as apply,
        $ysonList_to_Strings(d.data.generationIterations[i.index].generationResponse.keywords) as suggest,
        Yson::ConvertToUint64(d.data.generationIterations[i.index].generationResponse.additionalInfo.campaignId, $options) as cid,
        Yson::ConvertToUint64(d.data.generationIterations[i.index].generationResponse.reqId, $options) as suggest_req_id,
        $ysonListOfWarnings_to_result(
            d.data.generationIterations[i.index].generationResponse.additionalInfo.warnings,
            d.data.generationIterations[i.index].generationResponse.keywords
        ) as result,
        $ysonAdditionalInfo_to_additionalParams(d.data.generationIterations[i.index].generationResponse.additionalInfo) as additional_params,
        $yson_to_useDescription(d.data.generationIterations[i.index].generationResponse.additionalInfo) as with_description,
        $calc_usefull(
            $ysonList_to_Strings(d.data.savedKeywords), --apply
            $ysonList_to_Strings(d.data.generationIterations[i.index].generationResponse.keywords) --suggest
        ) as usefull,
        $calc_useless(
            $ysonList_to_Strings(d.data.savedKeywords), --apply
            $ysonList_to_Strings(d.data.generationIterations[i.index].generationResponse.keywords) --suggest
        ) as useless,
    from $data as d
    cross join $index as i
    where not Yson::IsEntity(d.data.generationIterations)
    and Yson::GetLength(d.data.generationIterations) > i.index
);

$applies = (
    select
        log_req_id,
        log_time,
        log_time as apply_time,
        apply,
        quality.common.suggest as suggest,
        quality.byUrl.suggest as suggestByUrl,
        quality.byDescription.suggest as suggestByDescription,
        quality.common.countRequests as countRequests,
        quality.byUrl.countRequests as countByUrlRequests,
        quality.byDescription.countRequests as countByDescriptionRequests,
        quality.common.countSuccess as countSuccess,
        quality.byUrl.countSuccess as countByUrlSuccess,
        quality.byDescription.countSuccess as countByDescriptionSuccess,
        $calc_usefull(apply, quality.common.suggest) as usefull,
        $calc_usefull(apply, quality.byUrl.suggest) as usefullByUrl,
        $calc_usefull(apply, quality.byDescription.suggest) as usefullByDescription,
        $calc_useless(apply, quality.common.suggest) as useless,
        $calc_useless(apply, quality.byUrl.suggest) as uselessByUrl,
        $calc_useless(apply, quality.byDescription.suggest) as uselessByDescription,
        quality.common.result as result,
        quality.byUrl.result as resultByUrl,
        quality.byDescription.result as resultByDescription,
    from (
        select
            log_req_id,
            some(log_time) as log_time,
            some(apply) as apply,
            $calcQualityByExperiments(AGGREGATE_LIST(AsStruct(
                with_description as with_description,
                result as result,
                suggest as suggest,
            ))) as quality
        from $suggests
        group by log_req_id
        union all
        select
            log_req_id, log_time,
            $ysonList_to_Strings(data.savedKeywords) as apply,
            $calcQualityByExperiments([]) as quality
        from $data
        where Yson::IsEntity(data.generationIterations)
        or Yson::GetLength(data.generationIterations) == 0
    )
);

-- FINISH SELECTS

-- START INSERTS

insert into $keywords_table with truncate
select * from $suggests order by log_req_id;

insert into $statistic_table with truncate
select * from $applies order by log_req_id;

-- FINISH INSERTS

select
result, count(*) as cnt
from $applies
group by result as result;
