use hahn;
pragma yt.UseNativeYtTypes = "1";
pragma yt.Pool = "goda-other";

$parse_children = ($children) -> (ListMap(ListFilter($children, ($x) -> ($x.type='question')), ($x) -> ($x.id)));

$parse_groups = ($groups) -> (ToDict(ListMap(DictItems($groups), ($x) -> ((cast($x.0 as Int64), $parse_children(($x.1).children))))));

$parse_question = ($question) -> (<|
    'label': $question.label,
    'type': $question.type,
    'required': $question.required,
    'options': ListMap($question.options, ($x) -> (cast($x.id as Int64) ?? -9999)),
    'rotation': $question.rotation,
    'multiple_choice': $question.multipleChoice,
    'no_answer': $question.noAnswer.enabled,
    'no_opinion_answer': $question.noOpinionAnswer.enabled,
    'other_answer': $question.otherAnswer.enabled,
    'other_answer_with_text': $question.otherAnswerWithText.enabled,
    'min_value': cast($question.minValue as Int64),
    'max_value': cast($question.maxValue as Int64),
    'option_texts': SetUnion(ToDict(ListMap($question.options, ($x) -> ((cast($x.id as Int64) ?? -9999, $x.label))) ?? ListCreate(Tuple<Int64, String>)),
        {
            -1L: $question.noAnswer.label,
            -2L: $question.noOpinionAnswer.label,
            -3L: $question.otherAnswer.label,
            -4L: $question.otherAnswerWithText.label
        }, ($_, $y, $z) -> ($y ?? $z ?? ''))
|>);

$parse_questions = ($questions) -> (ToDict(ListMap(DictItems($questions), ($x) -> ((cast($x.0 as Int64), $parse_question($x.1))))));

$surveys = select survey_id, revision_id, cast(Yson::ConvertToDouble(pageOrder[0], Yson::Options(false as Strict)) as Int64) as start_page,
$parse_groups(groups) as page_questions,
$parse_questions(questions) as questions
from (select survey_id, id as revision_id, Yson::ConvertTo(config,
Struct<
    groups: Dict<String, Struct<children: List<Struct<id: Double, type: String>>>>,
    pageOrder: List<Yson>,
    questions: Dict<String,
        Struct<label: String, type: String, required: Bool,
            options: List<Struct<id: Double, label: String>>?, rotation: Bool?, multipleChoice: Bool?, 
            noAnswer: Struct<enabled: Bool, label: String>?, noOpinionAnswer: Struct<enabled: Bool, label: String>?,
            otherAnswer: Struct<enabled: Bool, label: String>?, otherAnswerWithText: Struct<enabled: Bool, label: String>?,
            minValue: Double?, maxValue: Double?,
        >
    >
>, Yson::Options(true as AutoConvert))
from `//home/pythia/private/pg_raw/survey_revisions`)
flatten columns;


$actions3 = select d.*, cast(1 as Uint32) as revision_id without d.revision_id
from `//home/pythia/export/prod/actions_full` as d where yandexuid == 0 and revision_id is null
    and `timestamp` > 1640995200000000;


$actions4 = select a.*,
last_value(if(action = 'START', `timestamp`)) ignore nulls over w ?? first_value(`timestamp`) over w as start_timestamp
from $actions3 as a
window w as (partition by survey_id, interview_id order by `timestamp`)
;

$parse_options = ($options) -> {
    $o = Yson::ConvertTo($options, List<Int64?>, Yson::Options(false as Strict, true as AutoConvert));
    return if(ListLength($o) = 0 or ListLength($o) = 1 and Unwrap($o[0]) is null, ListCreate(Int64),
    ListMap($o, ($x) -> (Unwrap($x))));
};

$finishes2 = select
    survey_id, interview_id, start_timestamp, min(seq_num) as finish_seq_num
from $actions4
where action = 'FINISH'
group by survey_id, interview_id, start_timestamp,
    Yson::ConvertTo(Yson::Lookup(raw_meta, 'answers', Yson::Options(false as Strict)), Dict<String, String>?, Yson::Options(false as Strict)) as query
;

$switches = select * from (select min_by(TableRow(), (seq_num, question_timestamp)) from (
    select slug, survey_id, revision_id, interview_id, start_timestamp, `timestamp` as question_timestamp, seq_num, yandexuid,
    if(action = 'START', Yson::ConvertTo(Yson::YPath(raw_data, '/rotation_order/pages'), List<Int64>?)[0], page_to) as page_id,
    from $actions4 where action = 'START' or action = 'PAGE_SWITCH')
    group by survey_id, interview_id, start_timestamp, page_id) flatten columns;

$s1 = select s.page_id ?? ss.start_page as page_id, s.* without s.page_id, s.seq_num
from $switches as s
join $surveys as ss using (survey_id, revision_id);

$survey_pages = select survey_id, revision_id, page_id, question_id, questions[question_id] as question_info from (
select survey_id, revision_id, page_questions.0 as page_id, page_questions.1 as page_questions, questions from $surveys
flatten dict by page_questions) flatten list by (page_questions as question_id);

$questions = select slug, s.survey_id as survey_id, s.revision_id as revision_id, interview_id, start_timestamp, question_timestamp, yandexuid, question_id
from $s1 as s
join $survey_pages as p using (survey_id, revision_id, page_id);

$answers2 = select * from (select max_by(TableRow(), seq_num) from (
    select slug, survey_id, revision_id, interview_id, start_timestamp, seq_num, yandexuid, question_id,
    cast(substring(Yson::ConvertToString(Yson::YPath(raw_meta, '/prevyandexuid')), 1) as uint64) as prevyandexuid,
    $parse_options(Yson::Lookup(raw_data, 'options', Yson::Options(false as Strict))) as answer_option_ids,
    Yson::LookupString(raw_data, 'text', Yson::Options(false as Strict)) as answer_text,
    from $actions4 where action = 'ANSWER')
    group by survey_id, interview_id, start_timestamp, question_id) flatten columns;

$answer_timestamps = select * from (select max_by(TableRow(), (seq_num, -answer_timestamp)) from (
    select a.survey_id as survey_id, a.interview_id as interview_id, a.start_timestamp as start_timestamp, question_id, `timestamp` as answer_timestamp, seq_num
    from $actions4 as a left join $finishes2 as f using (survey_id, interview_id, start_timestamp)
    where finish_seq_num is null or seq_num < finish_seq_num)
    group by survey_id, interview_id, start_timestamp, question_id) flatten columns;

$textify = ($list, $dict) -> (ListMap($list, ($x)->(if($dict[$x] is not null, Unwrap($dict[$x]), null))));

$qa2 = select 
q.slug ?? a.slug as slug,
q.survey_id ?? a.survey_id as survey_id,
q.revision_id ?? a.revision_id as revision_id,
q.interview_id ?? a.interview_id as interview_id,
q.start_timestamp ?? a.start_timestamp as start_timestamp,
q.question_id ?? a.question_id as question_id,
q.question_timestamp as question_timestamp,
at.answer_timestamp as answer_timestamp,
q.yandexuid ?? a.yandexuid as yandexuid,
a.answer_option_ids as answer_option_ids,
a.answer_text as answer_text,
a.prevyandexuid as prevyandexuid
from $answers2 as a
left join $answer_timestamps as at on a.survey_id = at.survey_id and a.interview_id = at.interview_id and a.start_timestamp = at.start_timestamp and a.question_id = at.question_id
full join $questions as q on a.survey_id = q.survey_id and a.interview_id = q.interview_id and a.start_timestamp = q.start_timestamp and a.question_id = q.question_id;



$web_rotation_order = 
select * from (
    select max_by(TableRow(), seq_num) from (
    select seq_num, survey_id, revision_id, interview_id, raw_meta, 
    Yson::YPath(Yson::Lookup(raw_meta, 'answers')[0], '/meta/rotation_order') as rotation_order,
    Yson::ConvertToDouble(
        Yson::YPath(Yson::Lookup(raw_meta, 'answers')[0], '/questionId'), 
        Yson::Options(false as Strict) ) as question_id,
    Yson::ConvertToUint64(
        Yson::YPath(Yson::Lookup(raw_meta, 'answers')[0], '/meta/timestamp'), 
        Yson::Options(false as Strict) ) * 1000000 as timestamp_1,
    Yson::ConvertToUint64(
        Yson::YPath(raw_meta, '/meta/questionTimestamp'), 
        Yson::Options(false as Strict) ) * 1000000 as questTs,
    --ToDict(ListMap($question.options, ($x) -> ((cast($x.id as Int64) ?? -9999, $x.label)))
    AsDict(AsTuple(
        Yson::ConvertToInt64(raw_meta.answers[0].questionId, Yson::Options(true as AutoConvert)),
        Yson::ConvertToInt64List(raw_meta.answers[0].meta.rotation_order, Yson::Options(true as AutoConvert)),
    )) as rotation_option_ids,
    cast(substring(Yson::ConvertToString(Yson::YPath(raw_meta, '/meta/yandexuid')), 1) as uint64) as yandexuid
    FROM $actions4 where `action` = 'FINISH'
    ) group by interview_id
) flatten columns
where ListLength(rotation_option_ids) > 0
;



insert into `//home/goda/burdanov/surveys/pythia_answers_extra` with truncate

select qa.*,
p.question_info.label as question_label,
p.question_info.type as question_type,
p.question_info.required as is_required,
p.question_info.rotation as has_rotation,
p.question_info.multiple_choice as is_multiple_choice,
p.question_info.min_value as min_value,
p.question_info.max_value as max_value,
p.question_info.no_answer as has_no_answer,
p.question_info.no_opinion_answer as has_no_opinion_answer,
p.question_info.other_answer as has_other_answer,
p.question_info.other_answer_with_text as has_other_answer_with_text,
wro.rotation_option_ids[wro.question_id] as question_option_ids,
$textify(wro.rotation_option_ids[wro.question_id], p.question_info.option_texts) as question_option_labels,
$textify(answer_option_ids, p.question_info.option_texts) as answer_option_labels,
cast("serp" as string) as supplier_id,
wro.yandexuid ?? qa.prevyandexuid as yandexuid,
wro.questTs as question_timestamp,
wro.timestamp_1 as answer_timestamp,
without qa.yandexuid, qa.prevyandexuid, qa.question_timestamp, qa.answer_timestamp --, qa.start_timestamp
from $qa2 as qa
join $survey_pages as p on qa.survey_id = p.survey_id and qa.revision_id = p.revision_id and qa.question_id = p.question_id
left join $web_rotation_order as wro
on wro.survey_id = p.survey_id and wro.question_id = p.question_id and wro.revision_id = p.revision_id and qa.interview_id = wro.interview_id
order by survey_id, interview_id, start_timestamp, question_timestamp, answer_timestamp
;

commit;


insert into `//home/goda/burdanov/surveys/pythia_answers` with truncate
select * from `//home/goda/burdanov/surveys/pythia_answers_less_2022`
union all
select * from (
    select * from `//home/goda/burdanov/surveys/pythia_answers`
    union all
    select * from `//home/goda/burdanov/surveys/pythia_answers_extra`
    ) order by survey_id, interview_id, start_timestamp, question_timestamp, answer_timestamp
;

