-- Table "home/user_identification/homework/prod/homework_yuid" does not have any scheme attribute supported by YQL
PRAGMA yt.InferSchema;
PRAGMA yt.IgnoreWeakSchema;
PRAGMA yt.MaxRowWeight = "32M";
-- ========================================================================= --
$homework_uid_tbl = '//home/user_identification/homework/v2/prod/homework_unified_id';
$yuid_with_all = '//home/crypta/{{ crypta_env }}/ids_storage/yandexuid/yuid_with_all_info';
$profile_data  = '//home/crypta/{{ crypta_env }}/profiles/export/profiles_for_14days';
$hh_enrich     = '{{ household_dir }}/output/hh_reversed';
$hh_tv_edges   = '{{ household_dir }}/output/edges';
--
$composition_table = '{{ household_dir }}/output/composition';
$bb_output_dir  = '{{ household_dir }}/bb_output';
-- ========================================================================= --

-- d|desk|windows|10.0 -> d windows
-- m|phone||| -> m null
-- m|tablet|samsung|android|5.0.2 -> m android
$parse_ua_profile = Re2::Capture(
    @@^(?P<platform>[md]).*\|(?P<os>\w*)\|[^\|]*$@@);

$HH_MONSTER = 500;  -- 1000
-- ========================================================================= --

$profile = (
    SELECT
        CAST(yandexuid AS String) AS yandexuid,
        exact_socdem,
        -- gender,
        income_5_segments,
        probabilistic_segments
    FROM $profile_data
);

$homework_cid = (
    SELECT
        unified_id AS crypta_id,
        predicted_home
    FROM $homework_uid_tbl
    WHERE source_unified_id == "crypta_id"
);


DEFINE ACTION $make_hh_composition() AS

    $hh_composition = (
        SELECT
            hh.yuid AS yuid,
            hh.crypta_id AS crypta_id,
            hh.hhid AS hhid,
            Digest::MurMurHash32(hh.hhid) AS hhid_32,

            -- profile info
            (profile.exact_socdem) AS exact_socdem,
            (profile.income_5_segments) AS income_5_segments, -- income 5
            (profile.probabilistic_segments) AS probabilistic_segments,

            -- make ua profile
            yuid_with_all.browser AS ua_profile_browser,
            $parse_ua_profile(yuid_with_all.ua_profile).platform AS ua_profile_platform,
            $parse_ua_profile(yuid_with_all.ua_profile).os AS ua_profile_os,

            -- geo info
            {{ _serializer }}(geo.predicted_home) AS predicted_home

        FROM $hh_enrich AS hh

        LEFT JOIN $yuid_with_all AS yuid_with_all
        ON (hh.yuid == yuid_with_all.id)

        LEFT JOIN $homework_cid AS geo
        ON (hh.crypta_id == geo.crypta_id)

        LEFT JOIN $profile AS profile
        ON (hh.yuid == profile.yandexuid)
    );

    $hh_composition_udf = Python2::hh_composition_reducer(
        Callable<(
            Tuple<String?, Uint32?>,
            Stream<
                Struct<
                    yuid:String?,
                    crypta_id:String?,
                    hhid:String?,
                    hhid_32:Uint32?,

                    exact_socdem:Yson?,
                    income_5_segments:Yson?,
                    probabilistic_segments:Yson?,

                    ua_profile_browser:String?,
                    ua_profile_os:String?,
                    ua_profile_platform:String?,

                    predicted_home:Yson?
                >
            >
        ) ->

        Stream<
            Struct<
                hhid:String,
                hhid_32:Uint32,
                size:Uint64,
                socdems:Uint64,
                data:Yson,
                info:Yson,
                main_profile_yuid:String?,
                main_profile_crypta_id:String?,
                info_binary:String
            >
        >
    >, FileContent('composition.py'));

    $reduced_hh_data = (
        REDUCE $hh_composition
        ON hhid, hhid_32
        USING $hh_composition_udf(TableRow())
    );

    INSERT INTO $composition_table WITH TRUNCATE
    SELECT * FROM $reduced_hh_data
    FLATTEN COLUMNS
    ORDER BY hhid;
END DEFINE;

DEFINE ACTION $make_hh_bb_tskv() AS
    $old_table = $bb_output_dir || '/households_to_bb_old';
    $new_table = $bb_output_dir || '/households_to_bb';
    $diff_table = $bb_output_dir || '/households_to_bb_diff';

    DEFINE ACTION $make_tskv() AS
        $tskv = ($type, $key, $value) -> {
            RETURN String::JoinFromList(AsList(
                "keyword=353",
                $type || "=" || $key,
                "value=" || $value
            ), "\t");
        };

        $hh_composition_data = (
            SELECT
                -- TODO: now export only YANDEXUID(s)
                -- may be need to add export CRYPTA_ID(s)
                enrich.yuid AS id_value,
                "yuid" AS id_type,
                composition.info_binary AS info_binary,
                -- helpful data to filter
                composition.size AS size,
                composition.socdems AS socdems,
                enrich.is_tv AS is_tv
            FROM $hh_enrich AS enrich
            INNER JOIN $composition_table AS composition
            USING (hhid)
        );

        INSERT INTO $new_table WITH TRUNCATE
        SELECT
            hh_data.key AS key,
            hh_data.subkey AS subkey,
            hh_data.value AS value
        FROM (
            SELECT id_value AS key, "" AS subkey, $tskv(id_type, id_value, info_binary) AS value
            FROM $hh_composition_data
            WHERE ((size > 1 AND size < $HH_MONSTER) /* OR is_tv */) AND (socdems > 0)
        ) AS hh_data
        -- LEFT SEMI JOIN $profile AS profile
        -- -- filter only active 14d yuid-s
        -- ON (profile.yandexuid == hh_data.key)
        ORDER BY key, subkey;
    END DEFINE;

    DEFINE ACTION $make_backup() AS
        INSERT INTO $old_table WITH TRUNCATE
        SELECT * FROM $new_table
        ORDER BY key, subkey;
    END DEFINE;

    DEFINE ACTION $make_diff() AS
        INSERT INTO $diff_table WITH TRUNCATE
        SELECT
            new_table.key AS key,
            new_table.subkey AS subkey,
            new_table.value AS value
        FROM $new_table AS new_table
        LEFT JOIN $old_table AS old_table
        USING (key, subkey)
        WHERE (old_table.value IS NULL OR old_table.value != new_table.value)
        ORDER BY key, subkey;
    END DEFINE;

    DO $make_backup();
    COMMIT;
    DO $make_tskv();
    COMMIT;
    DO $make_diff();
END DEFINE;

DEFINE ACTION $make_tv_yuid_soup() AS
    -- Make Edges with SmartTv's
    -- For crypta/graph/export
    -- https://a.yandex-team.ru/arc/trunk/arcadia/crypta/graph/export/lib/graphs.py?rev=4261698#L223-232
    -- smart_tv_edges

    $tv_without_crypta_id = (
        SELECT hhid, yuid
        FROM $hh_enrich
        WHERE crypta_id IS NULL AND is_tv
    );

    $tv_edges_query = (
        SELECT
            main_crypta_id,
            id1,
            id2
        FROM (
            SELECT
                composition.main_profile_crypta_id AS main_crypta_id,
                composition.main_profile_yuid AS id1,
                hh_tv.yuid AS id2
            FROM $tv_without_crypta_id AS hh_tv
            INNER JOIN $composition_table AS composition
            USING (hhid)
        ) WHERE
            main_crypta_id IS NOT NULL
            AND id1 IS NOT NULL
            AND id2 IS NOT NULL
            AND id1 != id2
    );

    INSERT INTO $hh_tv_edges WITH TRUNCATE
    SELECT
        main_crypta_id AS cryptaId,
        id1,  -- main profile yuid
        IdType::YANDEXUID() AS id1Type,
        id2,  -- smart tv yuid
        IdType::YANDEXUID() AS id2Type,
        SourceType::SMART_TV() AS sourceType,
        LogSource::HOUSEHOLDS() AS logSource
    FROM $tv_edges_query
    ORDER BY cryptaId, id1Type, id2Type, sourceType, logSource, id1, id2;

END DEFINE;

-- ========================================================================= --

DO $make_hh_composition();
COMMIT;
DO $make_tv_yuid_soup();
DO $make_hh_bb_tskv();
