/*
    Ticket: LOGDATA-1913

    Author: ikhomyanin@

    Stream: NDD

    About: Extract intervals from Log Platform 
*/

use hahn;
pragma yt.MaxRowWeight = "128M";

$STATION_TAGS = '//home/taxi/testing/export/taxi-logistic-platform-production/station_tags_history';
$STATIONS_HISTORY = '//home/taxi/testing/export/taxi-logistic-platform-production/stations_history';

-- LOGDATA-1983 - Хардкодная табличка из шейрпоинта 
$RETURN_PERIODS = '//home/taxi-delivery/analytics/production/ndd/delivery_intervals/return_periods';

$SAVE_TO = '//home/taxi-delivery/analytics/production/ndd/delivery_intervals/tags_flattened';

$TAG_NAMES = (
    'yd_new_supply_promise_reservation'
    , 'abstract_supply_promise_ydNEW_reservation'
);

$START_DATE = cast(CurrentUtcDate() - Interval('P1D') * 10 as String);

-- ВРЕМЕННОЕ РЕШЕНИЕ, ЧТОБЫ СТАНЦИИ НЕ ПЕРЕСЧИТЫВАТЬ ------------------------------------------------------------------
$map_station_id = ($station_id) -> {
    $station = case 
        when $station_id == '251921c0-dfdd-4d8a-bbb6-1b1d219aa130' then 'YD-MSW'
        when $station_id == 'a1789211-f4ca-4ab7-8f8c-1d4bb3ff8752' then 'YM-KRR'
        when $station_id == '340a2501-9b27-4082-9fff-ea7e308ee3f7' then 'YM-EKB'
        when $station_id == '9e681b96-2db3-441f-9e25-440f152a5915' then 'YD-MSW-CN'
        when $station_id == 'd3f7e57d-22f1-462c-ab91-084b7b4e468e' then 'СЦ Яндекс.Доставка - склад на Пресне'
        when $station_id == '9ba64503-8c84-41f5-97aa-f114a728f942' then 'СЦ Яндекс.Доставка - склад на Пресне(день в день)'
        else 'UNKNOWN STATION'
    end;

    return $station;
};
-----------------------------------------------------------------------------------------------------------------------

-- ВСЕ СТАНЦИИ С ИХ МЕТА ДАННЫМИ --------------------------------------------------------------------------------------
$stations = (
    select station_id
        , max_by(operator_id, history_event_id) as operator_id
        , max_by(operator_station_id, history_event_id) as operator_station_id
        , max_by(station_name, history_event_id) as station_name
        , if(
            (Yson::YPathString(max_by(unpacked_data, history_event_id), '/station_full_name') == '' or Yson::YPathString(max_by(unpacked_data, history_event_id), '/station_full_name') is null)
            , max_by(station_name, history_event_id)
            , Yson::YPathString(max_by(unpacked_data, history_event_id), '/station_full_name')
        ) as station_full_name
        , Yson::YPathString(max_by(unpacked_data, history_event_id), '/station_type') as station_type
        , Yson::YPathDouble(max_by(unpacked_data, history_event_id), '/location/x') as longitude
        , Yson::YPathDouble(max_by(unpacked_data, history_event_id), '/location/y') as latitude
    from range($STATIONS_HISTORY)
    where 1=1
        and enabled_in_platform
    group by station_id
);
-----------------------------------------------------------------------------------------------------------------------

$extract_regions_with_intervals = ($meta) -> {
    $regions_list = Yson::YPathList($meta, '/dropoff_reservations');

    $class_names = ListMap(
        $regions_list, ($x) -> {
            return Yson::YPathString($x, '/area/class_name');
        }  
    );

    $names = ListMap(
        $regions_list, ($x) -> {
            return coalesce(
                Yson::YPathString($x, '/area/polygon_area_object/polygon_name'),
                Yson::YPathString($x, '/area/geo_id_area_object/geo_name'),
                Yson::YPathString($x, '/area/station_area_object/station_id')
            );
        }  
    );

    $ids = ListMap(
        $regions_list, ($x) -> {
            return Yson::YPathInt64($x, '/area/geo_id_area_object/geo_id');
        }  
    );

    $interval = ListMap(
        $regions_list, ($x) -> {
            return Yson::YPathString($x, '/interval/hr_interval');
        }  
    );

    $deliveries_limit = ListMap(
        $regions_list, ($x) -> {
            return Yson::YPathInt64($x, '/deliveries_limit');
        }  
    );

    return ListZip($class_names, $names, $ids, $interval, $deliveries_limit);
};

$list_to_tuples = ($list) -> {
    $new_list = ListMap($list, ($x) -> {
        return AsStruct(
            $x.0 as class_name,
            $x.1 AS name,
            $x.2 AS id,
            $x.3 AS hr_interval,
            $x.4 AS deliveries_limit
        );
    });

    return $new_list;
};

$extract_interval = ($interval, $geo_id) -> {
    $timezone = Geo::GetTimezone(cast($geo_id as Int32));
    $string_from = ListHead(String::SplitToList($interval, '/'));
    $string_to = ListLast(String::SplitToList($interval, '/'));

    $datetime_from = DateTime::ParseIso8601($string_from);
    $datetime_to = DateTime::ParseIso8601($string_to);

    $datetime_from_tz = AddTimezone(DateTime::MakeDatetime($datetime_from), $timezone);
    $datetime_to_tz = AddTimezone(DateTime::MakeDatetime($datetime_to), $timezone);

    $new_string_from = cast($datetime_from_tz as String);
    $new_string_to = cast($datetime_to_tz as String);

    $time_from = substring($new_string_from, 11, 5);
    $time_to = substring($new_string_to, 11, 5);

    return $time_from || '-' || $time_to;
};

$cals_delivery_period = ($start_date, $end_date) -> {
    $start = cast($start_date as Date);
    $end = cast($end_date as Date);

    return DateTime::ToDays($end - $start)
};

$script = @@
from shapely import wkt

def transform_wkt(text):
    if text.decode('ascii') == '':
        return None
    multy = wkt.loads(text.decode('ascii'))
    try:
        coords_list = []
        for poly in multy:
            poly_coords_list = []
            poly_coords = list(poly.exterior.coords)

            for coords in poly_coords:
                poly_coords_list.append([coords[1], coords[0]])


            coords_list.append(poly_coords_list)
    except:
        coords_list = []
        poly_coords_list = []
        poly_coords = list(multy.exterior.coords)
        for coords in poly_coords:
            poly_coords_list.append([coords[1], coords[0]])
        coords_list.append(poly_coords_list)
        
    return str(coords_list)
@@;

$transform_wkt = Python::transform_wkt(
    Callable<(String?)->String?>,
    $script
);

$find_type = ($id, $type) -> {
    $parents = Geo::GetParents(cast($id as Int32));

    $parents_with_meta = ListMap(
        $parents, 
        ($parent) -> {
            return Geo::RegionById($parent);
        }
    );

    $type_region = ListFilter(
        $parents_with_meta, 
        ($parent) -> {
            return $parent.type = $type;
        }
    )[0];

    return $type_region;
};

-- ЧТОБЫ ВЫТАЩИТЬ ПОЛИГОНЫ
$regions = "//home/geotargeting/public/geobase/regions";
$toponyms = "//home/maps/geocoder/geosrc/latest_state/toponyms";
$geometries = "//home/maps/geocoder/geosrc/latest_state/geometries";

$fix_whitespaces = ($str) -> { return String::ReplaceAll($str, "\n", " "); };
$is_valid_geometry = ($wkt) -> { return not String::StartsWith($wkt, "POINT"); };
$toponym_id_list = ($ids) -> { return String::JoinFromList(
    ListMap(
        ListSort($ids),
        ($id) -> {
            return cast($id as String);
        }
    ), ",");
};

$matched_borders = (
    select
        toponyms.geo_id as reg_id,
        $fix_whitespaces(min_by(geometries.wkt, toponyms.toponym_id)) as geometry_wkt,
        $toponym_id_list(aggregate_list(toponyms.toponym_id)) as toponym_id_list,
        count(toponyms.toponym_id) as toponym_count,
        some(regions.type) as reg_type,
        some(regions.population) as population
    from $toponyms as toponyms
    join $geometries as geometries on (toponyms.toponym_id = geometries.toponym_id)
    join $regions as regions on (toponyms.geo_id = regions.reg_id)
    where
        toponyms.is_geo_id_owner
        and regions.type >= 3
        and regions.country_iso != ""
        and $is_valid_geometry(geometries.wkt)
    group by
        toponyms.geo_id as reg_id
);
-----------

$tags = (
    select tag_id
        , max_by(tag_name, history_event_id) as tag_name
        , max_by(class_name, history_event_id) as class_name
        , max_by(history_action, history_event_id) as history_action
        , max(history_event_id) as history_event_id
        , max_by(history_timestamp, history_event_id) as history_timestamp
        , max_by(history_user_id, history_event_id) as history_user_id
        , max_by(object_id, history_event_id) as station_id
        , $map_station_id(max_by(object_id, history_event_id)) as station_name
        -- , max_by(unpacked_data, history_event_id) as tag_unpacked_data
        , Yson::YPathString(max_by(unpacked_data, history_event_id), '/operator_id') as tag_operator_id
        , Yson::YPathString(max_by(unpacked_data, history_event_id), '/status') as tag_status
        , Yson::YPathString(max_by(unpacked_data, history_event_id), '/pickup_interval') as pickup_interval
        , Yson::YPathString(max_by(unpacked_data, history_event_id), '/parent_tag_id') as parent_tag_id
        , $list_to_tuples($extract_regions_with_intervals(max_by(unpacked_data, history_event_id))) as tuples
    from range($STATION_TAGS, $START_DATE)
    where 1=1
        and tag_name in $TAG_NAMES
    group by tag_id
    having cast(CurrentUtcTimestamp() as String) < substring(Yson::YPathString(max_by(unpacked_data, history_event_id), '/pickup_interval'), 0, 27)
        and $list_to_tuples($extract_regions_with_intervals(max_by(unpacked_data, history_event_id))) is not null
        and max_by(history_action, history_event_id) != 'remove'
);

$tags_flattened = (
    select class_name
        , history_action
        , history_event_id
        , history_timestamp
        , history_user_id
        , parent_tag_id
        , pickup_interval
        , station_id
        , station_name
        , tag_id 
        , tag_name
        , tag_operator_id
        , tag_status
        , tuples.class_name as region_class_name
        , tuples.deliveries_limit as deliveries_limit
        , tuples.hr_interval as hr_interval
        , tuples.id as geo_id
        , if(tuples.name = '' or tuples.name is null, Geo::RegionById(cast(tuples.id as Int32)).name, tuples.name) as name
        , substring(pickup_interval, 0, 10) as pickup_interval_date
        , $extract_interval(tuples.hr_interval, tuples.id) as delivery_interval_msk
        , substring(tuples.hr_interval, 0, 10) as delivery_interval_date
        , $cals_delivery_period(substring(pickup_interval, 0, 10), substring(tuples.hr_interval, 0, 10)) as delivery_period
        , $find_type(tuples.id, 5).name as oblast
    from $tags
    flatten list by tuples
);

$raw = (
    select t.*
        , $transform_wkt(coalesce(g.geometry_wkt, '')) as poly
        , if(g.geometry_wkt is null, true, false) as null_geometry_flg
        , g.population as population
        , s.operator_id as operator_id
        , s.operator_station_id as operator_station_id
        , s.station_full_name as station_full_name
        , s.station_type as station_type
        , $find_type(Geo::RegionByLocation(s.latitude, s.longitude).id, 5).name as source_oblast
        , rp.mk_dostavki as mk_dostavki
        , rp.return_period_min as return_period_min
        , rp.return_period_max as return_period_max
        , if(rp.mk_dostavki is null, true, false) as null_return_period_flg
    from $tags_flattened as t
    left join $matched_borders as g on t.geo_id = g.reg_id
    left join $stations as s on t.station_id = s.station_id
    left join $RETURN_PERIODS as rp on t.geo_id = rp.geo_id
);

$station = (
    select *
    from $raw
    where 1=1
        and region_class_name = 'station'
);

$not_station = (
    select t1.*
        , cast(cast(t1.pickup_interval_date as Date) - Interval('P1D') as String) as day_before_pickup_date
    from $raw as t1
    where 1=1
        and t1.region_class_name != 'station'
);

$final = (
    select s.class_name as class_name 
            , s.deliveries_limit as deliveries_limit
            , ns.delivery_interval_date as delivery_interval_date
            , ns.delivery_interval_msk as delivery_interval_msk
            , 1 + s.delivery_period + ns.delivery_period as delivery_period
            , ns.geo_id as geo_id
            , s.history_action as history_action
            , s.history_event_id as history_event_id
            , s.history_timestamp as history_timestamp
            , s.history_user_id as history_user_id
            , ns.hr_interval as hr_interval
            , ns.mk_dostavki as mk_dostavki
            , ns.name as name 
            , ns.null_geometry_flg as null_geometry_flg
            , true as null_return_period_flg
            , ns.oblast as oblast
            , s.operator_id as operator_id
            , s.operator_station_id as operator_station_id
            , s.parent_tag_id as parent_tag_id
            , s.pickup_interval as pickup_interval
            , s.pickup_interval_date as pickup_interval_date
            , ns.poly as poly
            , ns.population as population
            , ns.region_class_name as region_class_name
            , null as return_period_max
            , null as return_period_min
            , s.source_oblast as source_oblast
            , s.station_full_name as station_full_name
            , s.station_id as station_id
            , s.station_name as station_name 
            , s.station_type as station_type
            , s.tag_id as tag_id
            , s.tag_name as tag_name
            , s.tag_operator_id as tag_operator_id
            , s.tag_status as tag_status
            , 2 as segment_cnt
            , ns.station_id as second_station_id
            , ns.station_full_name as second_station_full_name
    from $station as s 
    inner join $not_station as ns on s.name = ns.station_id and s.delivery_interval_date = ns.day_before_pickup_date

    union all

    select t1.*
        , 1 as segment_cnt
        , null as second_station_id
        , null as second_station_full_name
    without day_before_pickup_date
    from $not_station as t1
);

INSERT INTO $SAVE_TO WITH TRUNCATE 
select *
from $final
where 1=1
    and pickup_interval_date = cast(CurrentUtcDate() + Interval('P1D') as String);
