use hahn;

DEFINE SUBQUERY $join_perf_data($perf_input, $sessions_input) AS

$for_perf_join = (
    select
        vsid,
        ListSort(AGGREGATE_LIST(
            AsTuple(`timestamp`, `end_timestamp`, first_buffer_end)
        )) as ts_lists
    from (
        SELECT 
            vsid,
            `timestamp`,
            `timestamp` + MAX(view_time) as end_timestamp,
            MAX(CAST(IF(
                first_buffer_rel_time is not null,
                `timestamp` + first_buffer_rel_time + (first_buffer_duration ?? 1),
                null
            ) as Int64)) as first_buffer_end,
        from $sessions_input()
        group by vsid, `timestamp`
    ) group by vsid
);
-- vsid+timestamp is the simplest way to rejoin perf data to microsessions

$perf_reducer_input = (
    select
        perf.vsid as vsid,
        `timestamp`,
        transferSize,
        duration,
        ts_lists
    from $perf_input() as perf
    inner join $for_perf_join as fpj using (vsid)
);

$reducer = Python::reducer(
Callable<
(String?, Stream<Struct<
    'ts_lists':List<Tuple<Int64?,Int64?,Int64?>>,
    'vsid':String?,
    'transferSize':Double?,
    'duration':Double?,
    'timestamp':UInt64?
>>)->Stream<Struct<
    'vsid':String?,
    'timestamp':UInt64?,
    'throughput':Double?,
    'throughput_first_buffer':Double?,
    'transferSize':Double?,
    'transferSize_first_buffer':Double?,
    'duration':Double?,
    'duration_first_buffer':Double?,
>>>, FileContent("perf_reducer.py"));

$perf_reduced = (
    reduce $perf_reducer_input
    on vsid
    using $reducer(TableRow())
);

select s.*, p.* without p.vsid, p.`timestamp`
from $perf_reduced as p
inner join (
    select *
    from $sessions_input()
    where cell_lac_data is not null and ListLength(cell_lac_data) > 0
) as s using (vsid, `timestamp`);

END DEFINE;
EXPORT $join_perf_data;