pragma AnsiInForEmptyOrNullableItemsCollections;
use hahn;
pragma yson.DisableStrict;

$re_yandexuid = Re2::Match("[0-9]+1[23456][0-9]{8}");

$parseHeaders = ($headers) -> {
    $chunks = String::SplitToList($headers, '~');
    $pairs = ListMap($chunks, ($chunk) -> {
        $pair = String::SplitToList($chunk, ': ');
        RETURN AsTuple(String::AsciiToLower($pair[0]), COALESCE($pair[1], ''));
    });
    
    RETURN ToDict($pairs);
};

$getStrVal = ($headers, $name) -> {
    RETURN COALESCE($parseHeaders($headers)[$name], '');
};

$getDoubleVal = ($headers, $name) -> {
    RETURN CAST($getStrVal($headers, $name) AS Double);
};

define subquery $prepare_data($date_from, $date_to) as

$unwrap = ($x) -> {
    RETURN IF(
        $x is null,
        null,
        unwrap($x)
    )
};

$cryptageo = (
    select
        cast(yandexuid as String) as yandexuid,
        `geo`.`timestamp` ?? collect_timestamp as `timestamp`,
        Geo::RegionByLocation(`geo`.lat, `geo`.lon).id as region_id,
        "cryptageo" as source_log
    from range(
        `//home/logfeller/logs/geolocation-united-geolog/1d`, $date_from, $date_to
    )
    where Geo::RoundRegionByLocation(`geo`.lat, `geo`.lon, "country").id == 225 and yandexuid is not null
);

$getMbitSec = ($resource_timings) -> {
    $request_start = Yson::LookupDouble(Yson::Lookup(Yson::Lookup($resource_timings, 'jquery'), '0'), 'request_start');
    $response_end = Yson::LookupDouble(Yson::Lookup(Yson::Lookup($resource_timings, 'jquery'), '0'), 'response_end');
    $transfer_size = Yson::LookupInt64(Yson::Lookup(Yson::Lookup($resource_timings, 'jquery'), '0'), 'transfer_size');
    RETURN ($transfer_size * 8.0 / 1e6) / (($response_end - $request_start) / 1000.0)
};

$wrapIp = ($x)->(IF($x == "-" or $x == "", NULL, $x));

$access = (
select
    'access' as source_log,
    CAST(`_logfeller_timestamp` as Int64) as `timestamp`,
    yandexuid,
    $wrapIp(x_real_remote_ip) ?? $wrapIp(ip) as ip,
    Geo::GetIspNameByIp($wrapIp(x_real_remote_ip) ?? $wrapIp(ip)) as operator,
    user_agent,
    UserAgent::Parse(user_agent).OSFamily as os_family,
    $getStrVal(headers, 'ect') AS ect,
    $getDoubleVal(headers, 'downlink') AS downlink,
    $getDoubleVal(headers, 'rtt') AS rtt
FROM
    RANGE(`logs/yandex-access-log/1d`, $date_from, $date_to)
WHERE 
    yandexuid IS NOT NULL
    AND yandexuid != '-'
    AND ip IS NOT NULL
    AND ip != '-'
    AND x_yandex_internal_request != '1'
    AND x_yandex_suspected_robot != '1'
    AND (headers LIKE '%rtt:%' AND headers LIKE '%downlink:%' AND headers LIKE '%ect:%')
);

$mapped = (
    select * from $cryptageo
    union all
    select * from $access
);

$fielddateFormat = DateTime::Format("%Y-%m-%d");
$getFielddate = ($ts) -> {
    $ts = CAST($ts as UInt32);
    $tm = AddTimezone(DateTime::FromSeconds($ts), "Europe/Moscow");
    RETURN $fielddateFormat($tm)
};

-- select source_log, fielddate, count(*) as `count`
-- from $mapped
-- group by source_log, $getFielddate(`timestamp`) as fielddate
-- order by source_log, fielddate;

$input_type = Struct<
    'duration':Double?,
    'operator':String?,
    'region_id':Int64?,
    'quality':Uint64?,
    'ip':String?,
    'event':String?,
    'source_log':String?,
    'throughput':Double?,
    'os_family':String?,
    'timestamp':Int64?,
    'transferSize':Double?,
    'vsid':String?,
    'mbit_sec':Double?,
    'yandexuid':String?,
    'buffer_duration':Double?,
    'html':Double?,
    'ttfb':Double?,
    'full_load':Double?,
    'user_agent':String?,
    'service':String?,
    'ect':String?,
    'downlink':Double?,
    'rtt':Double?
>;

$output_type = Struct<
    'region_id':Int64?,
    'fielddate':String?,
    'throughput':Double?,
    'connection_type':String?,
    'timestamp':Int64?,
    'os_family':String?,
    'ip':String?,
    'mbit_sec':Double?,
    'operator':String?,
    'yandexuid':String?,
    'quality':Uint64?,
    'transferSize':Double?,
    'start':Int64?,
    'refuse':Int64?,
    'first_buffer_throughput':Double?,
    'first_buffer_duration':Double?,
    'buffer_duration':Double?,
    'view_time':Double?,
    'html':Double?,
    'ttfb':Double?,
    'full_load':Double?,
    'source_log':String?,
    'service':String?,
    'user_agent':String?,
    'ect':String?,
    'downlink':Double?,
    'rtt':Double?
>;

$reducer = Python::reducer(
    Callable<(String?, Stream<$input_type>)->Stream<$output_type>>,
    FileContent("mma_4376_reducer.py")
);

$reduced = (
    reduce $mapped
    presort `timestamp`
    on yandexuid
    using $reducer(TableRow())
);

-- insert into $reduced1_table WITH TRUNCATE 
-- select * from $reduced;

$desktop = select * from $reduced where os_family == "Windows";
$mobile = select * from $reduced where os_family in ("iOS", "Android");

$yandexuid_whitelist = select distinct yandexuid from $mobile;

$device_id_to_yandexuid = (
    select * from `//home/crypta/production/state/graph/v2/matching/by_id/yandexuid/direct/mm_device_id` as crypta
    left semi join $yandexuid_whitelist as wh on (crypta.id == wh.yandexuid)
);

define subquery $appmetrica_log_subquery($root) as
select DeviceID, NetworkType, EventTimestamp,
    IF(ConnectionType = 1, "CONN_WIFI", "CONN_CELL") as ConnectionType
from range($root, $date_from, $date_to);
end define;

$mobmetrika = (
    select
        "metrika" as source_log,
        crypta.id as yandexuid,
        cast(EventTimestamp as Uint64) as `timestamp`,
        ConnectionType as connection_type
    from (
        select * from $appmetrica_log_subquery("logs/appmetrica-events-log/appmetrica-yandex-events/1d")
        union all
        select * from $appmetrica_log_subquery("logs/appmetrica-events-log/appmetrica-external-events/1d")
        union all
        select * from $appmetrica_log_subquery("logs/appmetrica-events-log/browser-metrika-mobile-log/1d")
        union all
        select * from $appmetrica_log_subquery("logs/appmetrica-events-log/navi-metrika-mobile-log/1d")
        union all
        select * from $appmetrica_log_subquery("logs/appmetrica-events-log/taxi-metrika-mobile-log/1d")
        union all
        select * from $appmetrica_log_subquery("logs/appmetrica-events-log/superapp-metrika-mobile-log/1d")
    ) as m
    inner join $device_id_to_yandexuid as crypta on (crypta.target_id == m.DeviceID)
    where ConnectionType in ("CONN_CELL", "CONN_WIFI")
);

$reducer2 = Python::reducer2(
    Callable<(String?, Stream<$output_type>)->Stream<$output_type>>,
    FileContent("mma_4376_reducer.py")
);

$reduced2 = (
    reduce (
        select * from $mobile
        union all
        select * from $mobmetrika
    )
    presort `timestamp`
    on yandexuid
    using $reducer2(TableRow())
);

$united = (
    select * from $desktop
    union all
    select * from $reduced2
);

select * from $united;
end define;

export $prepare_data;
