use hahn;
pragma yson.DisableStrict;

$re_yandexuid = Re2::Match("[0-9]+1[23456][0-9]{8}");

DEFINE SUBQUERY $get_cell_lac_data($date_from, $date_to, $sessions) AS

$yandexuid_whitelist = (
    select distinct yandexuid from $sessions() where $re_yandexuid(yandexuid)
);

$device_id_to_yandexuid = (
    select * from `//home/crypta/production/state/graph/v2/matching/by_id/yandexuid/direct/mm_device_id` as crypta
    left semi join $yandexuid_whitelist as wh on (crypta.id == wh.yandexuid)
);

$getIntList = ($string) -> {
    $parsed = Yson::ParseJson($string);
    RETURN Yson::ConvertToInt64List($parsed)
};

$getStringList = ($string) -> {
    $parsed = Yson::ParseJson($string);
    RETURN Yson::ConvertToStringList($parsed)
};

$getDoubleList = ($string) -> {
    $parsed = Yson::ParseJson($string);
    RETURN Yson::ConvertToDoubleList($parsed)
};

$parseCellsTypes = ($string) -> {
    $len = Length($string);
    RETURN String::SplitToList(SUBSTRING($string, 1, cast($len - 2 as UInt32)), ",")
};

$banFlag_Cells = ($Cells_SignalsStrengths) -> {
    RETURN CASE
    WHEN ListLength(ListFilter($Cells_SignalsStrengths, ($x) -> {RETURN $x < -150 OR $x > 0})) > 0 THEN true
    WHEN ListUniq($Cells_SignalsStrengths) == AsList(CAST(-51 as Int64)) AND ListLength($Cells_SignalsStrengths) >= 10 THEN true
    WHEN ListUniq($Cells_SignalsStrengths) == AsList(CAST(-113 as Int64)) AND ListLength($Cells_SignalsStrengths) >= 10 THEN true
    ELSE false
    END
};

$mobileOperatorDict = AsDict(
    AsTuple(1, "mts"),
    AsTuple(99, "beeline"),
    AsTuple(2, "megafon"),
    AsTuple(20, "tele2"),
    AsTuple(5, "tele2"),
    AsTuple(11, "yota"),
);

$transformCellData = (
    $Cells_AreConnected,
    $Cells_OperatorsIDs,
    $Cells_SignalsStrengths,
    $Cells_Types,
    $Cells_CountriesCodes,
    $Cells_CellsIDs,
    $Cells_PhysicalsCellsIDs,
    $Cells_Lacs
) -> {
    $tuples = ListZip(
        $Cells_AreConnected, --0
        $Cells_OperatorsIDs, --1
        $Cells_SignalsStrengths, --2
        $Cells_Types, --3
        $Cells_CountriesCodes, --4
        $Cells_CellsIDs, --5
        $Cells_PhysicalsCellsIDs, --6
        $Cells_Lacs
    );
    $tuples = ListFilter($tuples, ($x) -> {RETURN $x.4 == 250});
    RETURN ListMap(
        $tuples, ($x) -> {
            RETURN AsStruct(
                $x.0 as IsConnected,
                $x.1 as OperatorID,
                DictLookup($mobileOperatorDict, $x.1) as MobileOperator,
                $x.2 as SignalStrength,
                $x.3 as CellType,
                $x.4 as CountryCode,
                $x.5 as CellID,
                $x.6 as PhysicalCellID,
                $x.7 as Lac
            )
        }
    )
};

$mobmetrika_step1 = (
    select
        crypta.id as yandexuid,
        DeviceID,
        cast(EventTimestamp as UInt64) as `timestamp`,
        $getIntList(Cells_PhysicalsCellsIDs) as Cells_PhysicalsCellsIDs,
        $getIntList(Cells_SignalsStrengths) as Cells_SignalsStrengths,
        $getIntList(Cells_CellsIDs) as Cells_CellsIDs,
        $getIntList(Cells_AreConnected) as Cells_AreConnected,
        $getIntList(Cells_Lacs) as Cells_Lacs,
        $getIntList(Cells_OperatorsIDs) as Cells_OperatorsIDs,
        $getStringList(Cells_OperatorsNames) as Cells_OperatorsNames,
        $getIntList(Cells_CountriesCodes) as Cells_CountriesCodes,
        $parseCellsTypes(Cells_Types) as Cells_Types,
    from range(
        `//logs/metrika-mobile-log/1d`, $date_from, $date_to
    ) as m
    inner join $device_id_to_yandexuid as crypta on (crypta.target_id == m.DeviceID)
);


$mobmetrika_step2 = (
    select
    $transformCellData(
        Cells_AreConnected,
        Cells_OperatorsIDs,
        Cells_SignalsStrengths,
        Cells_Types,
        Cells_CountriesCodes,
        Cells_CellsIDs,
        Cells_PhysicalsCellsIDs,
        Cells_Lacs
    ) as cell_data,
    $banFlag_Cells(Cells_SignalsStrengths) as banflag_cells,
    t.* without Cells_AreConnected, --0b
        Cells_OperatorsIDs, --1
        Cells_SignalsStrengths, --2
        Cells_Types, --3
        Cells_CountriesCodes, --4
        Cells_CellsIDs, --5
        Cells_PhysicalsCellsIDs, --6
        Cells_Lacs
    from $mobmetrika_step1 as t
    where
        ListLength(Cells_SignalsStrengths) == ListLength(Cells_OperatorsIDs)
        and ListLength(Cells_SignalsStrengths) == ListLength(Cells_AreConnected)
        and ListLength(Cells_SignalsStrengths) == ListLength(Cells_Types)
        and ListLength(Cells_SignalsStrengths) == ListLength(Cells_AreConnected)
        and ListLength(Cells_SignalsStrengths) == ListLength(Cells_CountriesCodes)
        and ListLength(Cells_SignalsStrengths) == ListLength(Cells_CellsIDs)
        and ListLength(Cells_SignalsStrengths) == ListLength(Cells_PhysicalsCellsIDs)
);

$banflag_blacklist = (
    select distinct DeviceID from $mobmetrika_step2 where banflag_cells
);

$mobmetrika_step3 = (
    select * from $mobmetrika_step2 as t
    left only join $banflag_blacklist as b using (DeviceID)
);

$getConnectedCell = ($cell_data) -> {
    $connectedOnly = ListFilter($cell_data, ($x) -> {RETURN $x.IsConnected == 1});
    RETURN IF(
        $connectedOnly IS NULL OR ListLength($connectedOnly) > 1 OR ListLength($connectedOnly) == 0,
        NULL,
        unwrap($connectedOnly[0])
    )
};

$mobmetrika_step4 = (
    select
        yandexuid,
        `timestamp`,
        $getConnectedCell(cell_data).CellID as CellID,
        $getConnectedCell(cell_data).Lac as Lac,
        $getConnectedCell(cell_data).MobileOperator as ConnectedMobileOperator
    from $mobmetrika_step3
    where $getConnectedCell(cell_data) is not null
);

$reducer_1 = Python::reducer(
Callable<(String?,
Stream<Struct<
'CellID':Int64?,
'Lac':Int64?,
'timestamp':UInt64?,
'yandexuid':String?,
'ConnectedMobileOperator':String?,
>>)->Stream<Struct<
'CellID':Int64?,
'Lac':Int64?,
'round_hour':UInt64?,
'ts_min':UInt64?,
'ts_max':UInt64?,
'fielddate_tsmax':String?,
'yandexuid':String?,
'ConnectedMobileOperator':String?
>>>, FileContent("cell_lac_reducer.py"));

$reduced = (
    reduce $mobmetrika_step4
    presort `timestamp`
    on yandexuid
    using $reducer_1(TableRow())
);

select * from $reduced;
END DEFINE;
EXPORT $get_cell_lac_data;