use hahn;

declare $yt_pool as String;
declare $date as String;
declare $src_path as String;
declare $dst_path as String?;

pragma yt.Pool = $yt_pool;
pragma yt.UseNativeYtTypes = "1";

$date_from = DateTime::Format("%Y-%m-%d")(cast($date as Date) - Interval("P6D"));

$dst_path = $dst_path ?? $src_path;

$parse_children = ($children) -> (ListMap(ListFilter($children, ($x) -> ($x.type = "question")), ($x) -> ($x.id)));

$parse_groups = ($groups) -> (ToDict(ListMap(DictItems($groups), ($x) -> ((cast($x.0 as Int64), $parse_children(($x.1).children))))));

$parse_question = ($question) -> (<|
    "label": $question.label,
    "type": $question.type,
    "required": $question.required,
    "options": ListMap($question.options, ($x) -> (cast($x.id as Int64) ?? -9999)),
    "rotation": $question.rotation,
    "multiple_choice": $question.multipleChoice,
    "no_answer": $question.noAnswer.enabled,
    "no_opinion_answer": $question.noOpinionAnswer.enabled,
    "other_answer": $question.otherAnswer.enabled,
    "other_answer_with_text": $question.otherAnswerWithText.enabled,
    "min_value": cast($question.minValue as Int64),
    "max_value": cast($question.maxValue as Int64),
    "option_texts": SetUnion(ToDict(ListMap($question.options, ($x) -> ((cast($x.id as Int64) ?? -9999, $x.label))) ?? ListCreate(Tuple<Int64, String>)),
        {
            -1L: $question.noAnswer.label,
            -2L: $question.noOpinionAnswer.label,
            -3L: $question.otherAnswer.label,
            -4L: $question.otherAnswerWithText.label
        }, ($_, $y, $z) -> ($y ?? $z ?? ""))
|>);

$parse_questions = ($questions) -> (
    ToDict(
        ListMap(
            DictItems($questions),
            ($x) -> ((cast($x.0 as Int64), $parse_question($x.1)))
        )
    )
);

$config_type =
Struct<
    groups: Dict<String, Struct<children: List<Struct<id: Double, type: String>>>>,
    pageOrder: List<Yson>,
    questions: Dict<String,
        Struct<label: String, type: String, required: Bool,
            options: List<Struct<id: Double, label: String>>?, rotation: Bool?, multipleChoice: Bool?,
            noAnswer: Struct<enabled: Bool, label: String>?, noOpinionAnswer: Struct<enabled: Bool, label: String>?,
            otherAnswer: Struct<enabled: Bool, label: String>?, otherAnswerWithText: Struct<enabled: Bool, label: String>?,
            minValue: Double?, maxValue: Double?,
        >
    >
>;

$surveys =
select
    survey_id,
    revision_id,
    cast(Yson::ConvertToString(pageOrder[0], Yson::Options(false as Strict)) as Int64) as start_page,
    $parse_groups(groups) as page_questions,
    $parse_questions(questions) as questions
from (
    select
        survey_id,
        id as revision_id,
        Yson::ConvertTo(config, $config_type, Yson::Options(true as AutoConvert))
    from `//home/pythia/private/pg_raw/survey_revisions`
)
flatten columns;

$get_timestamp = ($raw_meta, $action, $question_id) -> {
    $q_ts = ToDict(
        ListMap(
            Yson::LookupList($raw_meta, "answers", Yson::Options(false as Strict)),
            ($x) -> (
                (
                    Yson::YPathInt64($x, "/questionId", Yson::Options(false as Strict)),
                    Yson::YPathInt64($x, "/meta/timestamp", Yson::Options(false as Strict))
                )
            )
        )
    );
    return cast(
        if (
            $action = "FINISH",
            Yson::YPathInt64($raw_meta, "/meta/questionTimestamp", Yson::Options(false as Strict)),
            $q_ts[$question_id] ?? 0
        ) as uint32
    );
};

$actions_full_path = "//home/pythia/export/prod/actions_full";

$serp_ids =
select
    survey_id,
    interview_id,
    max(seq_num) as seq_num,
from $actions_full_path
where Yson::YPathString(raw_meta, "/meta/source", Yson::Options(false as Strict)) = "serp"
group by survey_id, interview_id;

$serp_actions =
select
    a.*,
    $get_timestamp(a.raw_meta, a.`action`, a.question_id) as start_timestamp,
    $get_timestamp(a.raw_meta, a.`action`, a.question_id) as `timestamp`,
    true as is_serp,
    1u as revision_id,
    "START" as `action`,
without a.`timestamp`, a.`action`, a.revision_id
from $actions_full_path as a
join $serp_ids as ids using (survey_id, interview_id, seq_num)
where a.action = "FINISH"

union all

select
    a.*,
    $get_timestamp(first_value(a.raw_meta) over w, "FINISH", null) as start_timestamp,
    $get_timestamp(first_value(a.raw_meta) over w, a.action, a.question_id) as `timestamp`,
    true as is_serp,
    1u as revision_id,
without a.`timestamp`, a.revision_id
from $actions_full_path as a
join $serp_ids as ids using (survey_id, interview_id, seq_num)
window w as (partition by a.survey_id, a.interview_id order by if(a.action = "FINISH", 0, 1));

$not_serp_actions =
select
    a.*,
    cast((
        last_value(if(a.action = "START", a.`timestamp`)) ignore nulls over w
        ?? first_value(a.`timestamp`) over w) / 1000000 as uint32
    ) as start_timestamp,
    cast(a.`timestamp` / 1000000 as uint32) as `timestamp`,
    false as is_serp,
without a.`timestamp`
from $actions_full_path as a
left join $serp_ids as ids using (survey_id, interview_id)
where ids.survey_id is null
window w as (partition by a.survey_id, a.interview_id order by a.`timestamp`);

$get_msk_day = ($ts) -> (
    DateTime::Format("%Y-%m-%d")(
        AddTimezone(
            DateTime::FromSeconds($ts),
            "Europe/Moscow"
        )
    )
);

$actions =
select *
from $serp_actions
where $get_msk_day(start_timestamp) >= $date_from

union all

select *
from $not_serp_actions
where $get_msk_day(start_timestamp) >= $date_from;

-- бывает [1,2,3]
-- бывает ["1","2","3"]
-- бывает [#]
-- или мусор типа "sdfdf"
$parse_options = ($options) -> {
    $o = Yson::ConvertTo($options, List<Int64?>, Yson::Options(false as Strict, true as AutoConvert));
    return if(ListLength($o) = 0 or ListLength($o) = 1 and Unwrap($o[0]) is null, ListCreate(Int64),
    ListMap($o, ($x) -> (Unwrap($x))));
};

$supplier = ($query, $is_serp) -> (
    case
        when $is_serp then "serp"
        when $query["supid"]="a9f7973c" then "tiburon"
        when $query["supid"]="7c73c5b5" then "cint"
        when $query["supid"]="4eddfe37" or $query["toloka_confirmation"] != "" then "toloka"
        when $query["from_direct"]="1" then "direct"
        when $query["pageid"] != "" then "display"
        else null
    end
);

$parse_question_options = ($options) -> (
    ToDict(
        ListMap(
            DictItems($options),
            ($x) -> ((cast($x.0 as Int64), ListMap($x.1, ($y) -> (($y).id))))
        )
    )
);

$get_serp_rotation_option_ids = ($raw_meta) -> (
    ToDict(
        ListMap(
            Yson::LookupList($raw_meta, "answers"),
            ($x) -> ((
                Yson::LookupInt64($x, "questionId"),
                Yson::ConvertToInt64List(Yson::YPath($x, "/meta/rotation_order"))
            ))
        )
    )
);

$get_rotation_option_ids = ($raw_data, $raw_meta) -> {
    $result = $parse_question_options(
        Yson::ConvertTo(
            Yson::YPath(
                $raw_data,
                "/rotation_order/question_options",
                Yson::Options(false as Strict)
            ),
            Dict<String, List<Struct<id: Int64>>>?
        )
    );

    return if(DictHasItems($result), $result, $get_serp_rotation_option_ids($raw_meta));
};

$starts =
select *
from (
    select max_by(TableRow(), seq_num)
    from (
        select
            slug, survey_id, revision_id, interview_id, start_timestamp, seq_num, yandexuid,
            Yson::LookupString(raw_meta, "referer", Yson::Options(false as Strict)) as referer,
            Yson::LookupString(raw_meta, "userAgent", Yson::Options(false as Strict)) as user_agent,
            Yson::ConvertTo(Yson::Lookup(raw_meta, "query", Yson::Options(false as Strict)), Dict<String, String>?, Yson::Options(false as Strict)) as query,
            $get_rotation_option_ids(raw_data, raw_meta) as rotation_option_ids,
        from $actions
        where action = "START"
    )
    group by survey_id, interview_id, start_timestamp
)
flatten columns;

$finishes =
select
    survey_id,
    interview_id,
    start_timestamp,
    min(seq_num) as finish_seq_num
from $actions
where action = "FINISH"
group by survey_id, interview_id, start_timestamp;

$switches =
select *
from (
    select min_by(TableRow(), (seq_num, question_timestamp))
    from (
        select
            slug,
            survey_id,
            revision_id,
            interview_id,
            start_timestamp,
            `timestamp` as question_timestamp,
            seq_num,
            yandexuid,
            if(
                action = "START",
                Yson::ConvertTo(Yson::YPath(raw_data, "/rotation_order/pages"), List<Int64>?)[0],
                page_to
            ) as page_id,
            is_serp,
        from $actions
        where action = "START" or action = "PAGE_SWITCH"
    )
    group by survey_id, interview_id, start_timestamp, page_id
)
flatten columns;

$s1 =
select
    s.page_id ?? ss.start_page as page_id,
    s.*
without s.page_id, s.seq_num
from $switches as s
join $surveys as ss
using (survey_id, revision_id);

$survey_pages =
select
    survey_id,
    revision_id,
    page_id,
    question_id,
    questions[question_id] as question_info
from (
    select
        survey_id,
        revision_id,
        page_questions.0 as page_id,
        page_questions.1 as page_questions,
        questions
    from $surveys
    flatten dict by page_questions
)
flatten list by (page_questions as question_id);

$questions =
select
    slug,
    s.survey_id as survey_id,
    s.revision_id as revision_id,
    s.is_serp as is_serp,
    interview_id,
    start_timestamp,
    question_timestamp,
    yandexuid,
    question_id
from $s1 as s
join $survey_pages as p
using (survey_id, revision_id, page_id);

$answers =
select *
from (
    select max_by(TableRow(), seq_num)
    from (
        select
            slug, survey_id, revision_id, interview_id, start_timestamp, seq_num, yandexuid, question_id, is_serp,
            $parse_options(Yson::Lookup(raw_data, "options", Yson::Options(false as Strict))) as answer_option_ids,
            Yson::LookupString(raw_data, "text", Yson::Options(false as Strict)) as answer_text,
        from $actions
        where action = "ANSWER"
    )
    group by survey_id, interview_id, start_timestamp, question_id
)
flatten columns;

$answer_timestamps =
select *
from (
    select max_by(TableRow(), (seq_num, -answer_timestamp))
    from (
        select
            a.survey_id as survey_id,
            a.interview_id as interview_id,
            a.start_timestamp as start_timestamp,
            question_id, `timestamp` as answer_timestamp,
            seq_num
        from $actions as a
        left join $finishes as f
        using (survey_id, interview_id, start_timestamp)
        where finish_seq_num is null or seq_num < finish_seq_num or is_serp
    )
    group by survey_id, interview_id, start_timestamp, question_id
)
flatten columns;

$qa =
select
    q.slug ?? a.slug as slug,
    q.survey_id ?? a.survey_id as survey_id,
    q.revision_id ?? a.revision_id as revision_id,
    q.interview_id ?? a.interview_id as interview_id,
    q.start_timestamp ?? a.start_timestamp as start_timestamp,
    q.question_id ?? a.question_id as question_id,
    q.question_timestamp as question_timestamp,
    at.answer_timestamp as answer_timestamp,
    q.yandexuid ?? a.yandexuid as yandexuid,
    a.answer_option_ids as answer_option_ids,
    a.answer_text as answer_text,
    a.is_serp ?? q.is_serp as is_serp,
from $answers as a
left join $answer_timestamps as at on a.survey_id = at.survey_id and a.interview_id = at.interview_id and a.start_timestamp = at.start_timestamp and a.question_id = at.question_id
full join $questions as q on a.survey_id = q.survey_id and a.interview_id = q.interview_id and a.start_timestamp = q.start_timestamp and a.question_id = q.question_id;

$textify = ($list, $dict) -> (ListMap($list, ($x)->(if($dict[$x] is not null, Unwrap($dict[$x]), null))));

$result =
select
    $get_msk_day(qa.start_timestamp) as day,
    qa.*,
    p.question_info.label as question_label,
    p.question_info.type as question_type,
    p.question_info.required as is_required,
    p.question_info.rotation as has_rotation,
    p.question_info.multiple_choice as is_multiple_choice,
    p.question_info.min_value as min_value,
    p.question_info.max_value as max_value,
    p.question_info.no_answer as has_no_answer,
    p.question_info.no_opinion_answer as has_no_opinion_answer,
    p.question_info.other_answer as has_other_answer,
    p.question_info.other_answer_with_text as has_other_answer_with_text,
    s.rotation_option_ids[qa.question_id] ?? p.question_info.options as question_option_ids,
    $textify(s.rotation_option_ids[qa.question_id] ?? p.question_info.options, p.question_info.option_texts) as question_option_labels,
    $textify(answer_option_ids, p.question_info.option_texts) as answer_option_labels,
    referer,
    user_agent,
    $supplier(query, qa.is_serp) as supplier_id,
    query["ad_group_type"] ?? query["bl_adg"] as ad_group_type,
    query["_theme_"] = "video" as video,
without qa.is_serp
from $qa as qa
join $survey_pages as p on qa.survey_id = p.survey_id and qa.revision_id = p.revision_id and qa.question_id = p.question_id
left join $starts as s on qa.survey_id = s.survey_id and qa.interview_id = s.interview_id and qa.start_timestamp = s.start_timestamp;

$check_src_path =
select count(*) > 0
from folder(substring($src_path, 0, cast(rfind($src_path, "/") as uint32)))
where String::EndsWith($src_path, Path);

evaluate if $check_src_path do begin
    insert into $dst_path with truncate
    select unwrap(day) as day, t.*
    without t.day
    from $src_path as t
    where day < $date_from
    assume order by day, survey_id, interview_id, start_timestamp, question_timestamp, answer_timestamp;

    insert into $dst_path
    select unwrap(day) as day, t.*
    without t.day
    from $result as t
    order by day, survey_id, interview_id, start_timestamp, question_timestamp, answer_timestamp;
end do
else do begin
    insert into $dst_path with truncate
    select unwrap(day) as day, t.*
    without t.day
    from $result as t
    order by day, survey_id, interview_id, start_timestamp, question_timestamp, answer_timestamp;
end do;
