use hahn;


$folder_tags = '//home/logfeller/logs/taxi-tags-tag-event-log/1d';
$supply_metrics = '//home/taxi-delivery/analytics/dev/emilsharafiev/logdata-1828/supply_metrics_v2';
$market_transit = '//home/taxi-delivery/analytics/dev/emilsharafiev/logdata-1828/market_transit';
$save_to = '//home/taxi-delivery/analytics/dev/emilsharafiev/logdata-1828/active_cars';

-- Страшный нормализатор ----------

$script = @@
import datetime as dt
today = str(dt.date.today())
def generate_dates(date_from, date_to=today):
    """
    generate array of dates between two dates given in isoformat
    """
    if not isinstance(date_from, str):
        date_from = date_from.decode('utf-8')
    if not isinstance(date_to, str):
        date_to = date_to.decode('utf-8')
    returnarray = []
    try:
        current = dt.date.fromisoformat(str(date_from))
        finish = dt.date.fromisoformat(str(date_to))
    except ValueError:
        return(["Плохой формат даты"])
    while current <= finish:
        returnarray.append(str(current))
        current += dt.timedelta(days=1)
    return returnarray
@@;
$generate_series = Python3::generate_dates(Callable<(String?)->List<string>>, $script);


$cyrlat = [("а","a"), ("в","b"), ("е","e"), ("к","k"), ("м","m"), ("н","h"), ("о","o"), ("р","p"), ("с","c"), ("т","t"), ("у","y"), ("х","x")];
$mapping_cyr2lat = ToDict(
    ListMap(
        $cyrlat,
        ($pair) -> ((
            Unwrap(Unicode::ToCodePointList(CAST($pair.0 AS Utf8))[0]),
            Unwrap(Unicode::ToCodePointList(CAST($pair.1 AS Utf8))[0])
        ))
    )
);
$tl_cyr2lat = ($word) -> (CAST(Unicode::FromCodePointList(ListMap(
    Unicode::ToCodePointList(Unicode::ToLower(CAST($word AS Utf8))),
    ($cp) -> ($mapping_cyr2lat[$cp] ?? $cp)
)) AS String));

$extract_carid = ($x) -> {
    return SUBSTRING($x, FIND($x, '_') + cast(1 as Uint32))
};
----------------------------


$tagged_cars = (
    select
        case entity_type
            when 'car_number' then $tl_cyr2lat(entity ?? '')
            when 'park_car_id' then $extract_carid(entity)
            else entity
        end as number_normalized,
        substring(timestamp_field, 0, 10) as tag_valid_from_dt,
        substring(case
            when ttl = 'infinity' then '9999-12-31'
            else ttl
        end, 0, 10) as tag_valid_to_dt
    from range($folder_tags, '2022-04-10')
    where tag = 'drive_market_transit'
        and entity_type = 'park_car_id'
);



$active_market_cars = (
    select 
        shift_date
        , count(distinct registration_number) as active_cars
    FROM `//home/market/production/analytics/business/delivery/couriers_route_transport`
    where 1=1
        and routing_vehicle_type = 'YANDEX_DRIVE'
        and transport_type_name = 'Ford Transit Яндекс'
    group by 
        shift_date
    having shift_date >= '2022-04-01'
);


$dates = (
    select
        $generate_series(min(tag_valid_from_dt)) as calendar_dt
    from $tagged_cars
);

$our_cars = (
    select
        calendar_dt,
        count(distinct t2.number_normalized) as tagged_cars
    from
        (
        select *
        from $dates
        flatten by calendar_dt
        ) as t1
    cross join $tagged_cars as t2
    where calendar_dt between tag_valid_from_dt and tag_valid_to_dt
    group by t1.calendar_dt as calendar_dt
);

$cargocorp = (
    select
        calendar_dt,
        count(distinct number_normalized) as cargocorp
    from $market_transit
    group by calendar_dt
);

$our_active_cars = (
    select
        calendar_dt,
        COUNT(distinct case when trips_ether > 0 then number_normalized else null end) as cargo
    from $supply_metrics
    group by calendar_dt
);


INSERT INTO $save_to WITH TRUNCATE 
select
    dt.calendar_dt as calendar_dt,
    1698 as total_market_cars,
    active_cars as market_active_cars,
    tagged_cars,
    cargocorp,
    cargo
from (select calendar_dt from $dates flatten by calendar_dt) as dt
left join $active_market_cars as amc
    on amc.shift_date = dt.calendar_dt
left join $our_cars as oc
    on dt.calendar_dt = oc.calendar_dt
left join $cargocorp as cc
    on dt.calendar_dt = cc.calendar_dt
left join $our_active_cars as oac
    on dt.calendar_dt = oac.calendar_dt
;

