USE hahn;
$flow_table = "//home/geoadv/seregapanov/fraud_flow/domain_agency";
$write_path = "//home/geoadv/seregapanov/fraud_flow/agency_transit";

$transit = ($max_period_1, $min_period_2) -> {
    $period = 15;
    
    $result = 
    Abs(DateTime::ToDays(CAST($min_period_2 AS Date) - CAST($max_period_1 AS Date))) <= $period;

    RETURN $result;
};

$split = ($max_period_1, $min_period_2, $max_period_2) -> {
    $period = 15;
    
    $result = (DateTime::ToDays(CAST($max_period_1 AS Date) - CAST($min_period_2 AS Date)) > $period) or (DateTime::ToDays(CAST($max_period_1 AS Date) - CAST($min_period_2 AS Date)) <= $period and Abs(DateTime::ToDays(CAST($max_period_2 AS Date) - CAST($max_period_1 AS Date)))<=$period);

    RETURN $result;
};

--INSERT INTO $write_path WITH TRUNCATE
$get_changes =
SELECT 
    a.domain as domain,
    a.domain_size as domain_size, 
    a.mean_money as mean_money,
    a.agency_id as agency1_id,
    a.agency_name as agency1_name,
    b.agency_id as agency2_id,
    b.agency_name as agency2_name,
    a.first_day as agency1_first_day,
    a.last_day as agency1_last_day,
    b.first_day as agency2_first_day,
    b.last_day as agency2_last_day,
    b.agency_type as agency2_type,
    CASE 
        WHEN $split(a.last_day, b.first_day, b.last_day) and  a.agency_id==b.agency_id THEN 4
        WHEN $split(a.last_day, b.first_day, b.last_day) and  a.agency_id!=b.agency_id THEN 3
        WHEN $transit(a.last_day, b.first_day) and a.agency_id==b.agency_id THEN 2
        WHEN $transit(a.last_day, b.first_day) and a.agency_id!=b.agency_id THEN 1
        ELSE 0
    END as type
FROM
    $flow_table as a
join
    $flow_table as b
ON
    a.domain==b.domain
WHERE
    a.agency_client_number<b.agency_client_number
    and ($split(a.last_day, b.first_day, b.last_day) or $transit(a.last_day, b.first_day))
;

INSERT INTO $write_path WITH TRUNCATE
SELECT 
    domain,
    domain_size,
    mean_money,
    agency2_first_day,
    DateTime::GetMonthName(CAST(agency2_first_day as Date)) as ag2_month,
    agency2_last_day,
    agency2_id,
    agency2_name,
    agency2_type, 
    CASE MAX(type)
        WHEN 4 THEN "internal split"
        WHEN 3 THEN "external split"
        WHEN 2 THEN "internal transit"
        WHEN 1 THEN "external transit"
        else "error"
    END as type,

    LISTCONCAT(
        ListUniq(
            ListFlatten(
                AsList(
                    MAX_BY(CAST(agency1_id as STRING), type, 20)
                )
            )
        ), ' | '
    ) as agency1_id,
    
    LISTCONCAT(
        ListUniq(
            ListFlatten(
                AsList(
                    MAX_BY(CAST(agency1_name as STRING), type, 20)
                )
            )
        ), ' | '
    ) as agency1_name
    
FROM $get_changes
GROUP By
    domain,
    domain_size,
    mean_money,
    agency2_first_day,
    agency2_last_day,
    agency2_id,
    agency2_name,
    agency2_type
;
