USE hahn;
PRAGMA yt.DefaultMaxJobFails = '1';
PRAGMA yt.TemporaryAutoMerge = 'disabled';
-- PRAGMA yt.Pool = 'goda-prod';

PRAGMA Library('messenger_lib.sql');
IMPORT messenger_lib SYMBOLS $default_filter;


$script = @@
from collections import defaultdict

def dfs(g, v, visited):
    visited.add(v)
    res = 1
    for w in g[v]:
        if w not in visited:
            res += dfs(g, w, visited)
    return res 

def random_graph(edges):
    g = defaultdict(list)
    for e in edges:
        g[e[0]].append(e[1])
        g[e[1]].append(e[0])
    visited = set()
    res = []
    for v in g:
        if v in visited:
            continue
        res.append(dfs(g, v, visited))
    return res
@@;


$source = SELECT * FROM RANGE("//home/mssngr/squeeze/sent_messages", '2020-04-20');


$oriented_edges = SELECT
    DISTINCT 
    puid,
    to_puid AS puid_to,
    DateTime::MakeDate(DateTime::StartOfWeek(CAST(fielddate AS Date))) AS week
FROM $source
WHERE puid != 0 AND to_puid != 0 AND $default_filter(TableRow()) AND chat_type = '1on1';


-- $oriented_edges = SELECT * FROM AS_TABLE([
--   <| puid:1, puid_to:2, week:1 |>,
--   <| puid:2, puid_to:1, week:1 |>,
--   <| puid:3, puid_to:2, week:1 |>,
--   <| puid:2, puid_to:3, week:1 |>,
--   <| puid:3, puid_to:4, week:1 |>,
--   <| puid:4, puid_to:3, week:1 |>,
--   <| puid:5, puid_to:6, week:1 |>,
--   <| puid:6, puid_to:5, week:1 |>,
--   <| puid:7, puid_to:7, week:1 |>
-- ]);


$edges = SELECT
    a.week AS week,
    CAST(a.puid AS String) AS puid_1,
    CAST(a.puid_to AS String) AS puid_2
FROM $oriented_edges AS a
JOIN $oriented_edges AS b
ON a.puid = b.puid_to AND a.puid_to = b.puid AND a.week = b.week;


$transform_graph = Python::random_graph(
    Callable< (List <Tuple <String, String> >) -> List< Int32 > >, $script
);


$components = SELECT
    week,
    $transform_graph(AGGREGATE_LIST((puid_1, puid_2))) AS size_list
FROM $edges
GROUP BY week;


$sizes = SELECT
    week,
    size,
    COUNT(*) AS cnt
FROM $components
FLATTEN LIST BY size_list AS size
GROUP BY week, size;

-- SELECT * FROM $sizes;


INSERT INTO `//home/mssngr/squeeze/dashboard_datasets/clusters` WITH TRUNCATE
SELECT
    week,
    size,
    cnt,
    ROW_NUMBER() OVER w AS size_rank
FROM $sizes
WINDOW w AS (
    PARTITION BY week
    ORDER BY size DESC
)
ORDER BY week, size DESC;
