from crypta.graph.soupy_indevice.lib.util import run_yql, timed
from crypta.graph.soupy_indevice.lib import config


def good_id_yql():
    return '''
        $good_id = ($id) -> {
            $significant_symbols = String::ToByteList(String::ReplaceAll($id ?? '', '-', ''));
            $uniq_symbols_count = ListLength(ListUniq($significant_symbols));
            return $uniq_symbols_count > 1;
        };

        $is_good_ls_day = ($st, $ls, $ld) -> {
            RETURN CASE
                WHEN ($st == 'app-url-redir' AND $ld < 2) THEN False
                ELSE True
            END;
        };
    '''


@timed
def preprocess_soup(soup_tables, idstorage_root, output_pairs, output_bad_ids, tx):
    run_yql(good_id_yql() + '''
        PRAGMA SimpleColumns;
        PRAGMA yt.ExternalTx = '{tx}';
        PRAGMA yt.DisableJobSplitting;

        $idstorage = (
            SELECT
                Identifiers::StrictNormalize(id_type, id) AS id, id_type,
                manufacturer, model, String::ToLower(os) as os
            FROM CONCAT(
                `{idstorage}/idfa/eternal`,
                `{idstorage}/gaid/eternal`,
                `{idstorage}/oaid/eternal`,
                `{idstorage}/mm_device_id/eternal`
            )

            union all

            select id, id_type, String::ToLower(os) as os, app_id, app_version from `{idstorage}/uuid/eternal`

            union all

            select id, id_type, String::ToLower(os_family) as os
            from CONCAT(
                `{idstorage}/yandexuid/eternal`,
                `{idstorage}/icookie/eternal`
            )
        );

        $pairs_with_normalized_devids = (
            SELECT
                t.id1Type as id1Type,
                Identifiers::StrictNormalize(t.id1Type, t.id1) as id1,
                t.id2Type as id2Type,
                Identifiers::StrictNormalize(t.id2Type, t.id2) as id2,
                ListMin(t.dates) as date_begin,
                ListMax(t.dates) as date_end,
                t.sourceType as sourceType
            FROM CONCAT({soup_tables}) as t
            WHERE $is_good_ls_day(t.sourceType, t.logSource, ListLength(t.dates))
        );

        $pairs_all = (
            SELECT
                t.id1Type AS id1Type,
                t.id1 AS id1,
                t.id2Type AS id2Type,
                t.id2 AS id2,
                t.sourceType as sourceType,
                (t.id1Type || '_' || t.id1) as u,
                (t.id2Type || '_' || t.id2) as v,
                t.date_begin as date_begin,
                t.date_end as date_end,
                d1.model as id1_model,
                d1.manufacturer as id1_manufacturer,
                d1.os as id1_os,
                d1.app_id as id1_app_id,
                d1.app_version as id1_app_version,
                d2.model as id2_model,
                d2.manufacturer as id2_manufacturer,
                d2.os as id2_os,
                d2.app_id as id2_app_id,
                d2.app_version as id2_app_version
            FROM $pairs_with_normalized_devids as t
            LEFT JOIN $idstorage as d1 on t.id1Type = d1.id_type and t.id1 = d1.id
            LEFT JOIN $idstorage as d2 on t.id2Type = d2.id_type and t.id2 = d2.id
        );

        $pairs_no_emulators = (
            select * from $pairs_all
            WHERE
                $good_id(id1) AND
                $good_id(id2) AND
                (id1_model IS NULL or id1_model != 'Emulator') AND
                (id2_model IS NULL or id2_model != 'Emulator')
        );

        $bad_ids_emulators = (
            select distinct id, id_type, reason from (
                select id1 as id, id1Type as id_type, 'emulator' as reason
                from $pairs_all
                where not $good_id(id1) or id1_model = 'Emulator'
                union all
                select id2 as id, id2Type as id_type, 'emulator' as reason
                from $pairs_all
                where not $good_id(id2) or id2_model = 'Emulator'
            )
        );

        insert into `{output_bad_ids}` with truncate
        select * from $bad_ids_emulators;

        insert into `{output_pairs}` with truncate
        select * from $pairs_no_emulators
        ORDER BY id1, id2, id1Type, id2Type;
    '''.format(
        tx=tx.transaction_id,
        soup_tables=', '.join(['`%s`' % x for x in soup_tables]),
        idstorage=idstorage_root,
        output_bad_ids=output_bad_ids,
        output_pairs=output_pairs
    ))


@timed
def collapse_app_uuids(preprocessed_soup, output_apps, output_soup, tx):
    run_yql('''
        PRAGMA yt.ExternalTx = '{tx}';

        $soup = '{soup}';

        -- NOTE: Only handle mm_device_id-uuid edges here (not uuid-mm_device_id),
        --       as we're mostly only interested in appmetrica edges
        $uuid_app = (
            select
                id2 as id1,
                id2Type as id1Type,
                (nvl(id2_app_id, id2) || '_' || nvl(id2_app_version, 'unknown') || '_' || id1) as id2,
                '{app_idtype}' as id2Type,
                date_begin,
                date_end
            from $soup
            where id1Type = 'mm_device_id' and id2Type = 'uuid'
        );

        -- A single uuid can be linked to many devids: make a unique app-id
        $uuid_app_uniq = (
            select
                id1,
                id1Type,
                min(id2) as id2,
                min(id2Type) as id2Type,
                min(date_begin) as date_begin,
                max(date_end) as date_end
            from $uuid_app
            group by id1, id1Type
        );

        $updated_soup = (
            select
                t.*,
                (t.id1Type || '_' || t.id1) as u,
                (t.id2Type || '_' || t.id2) as v
            from (
                select
                    if(a.id1 is null, t.id1Type, a.id2Type) as id1Type,
                    if(a.id1 is null, t.id1, a.id2) as id1,
                    if(b.id1 is null, t.id2Type, b.id2Type) as id2Type,
                    if(b.id1 is null, t.id2, b.id2) as id2,
                    t.sourceType as sourceType,
                    t.date_begin as date_begin,
                    t.date_end as date_end,
                    t.id1_model as id1_model,
                    t.id1_manufacturer as id1_manufacturer,
                    t.id1_os as id1_os,
                    t.id1_app_id as id1_app_id,
                    t.id1_app_version as id1_app_version,
                    t.id2_model as id2_model,
                    t.id2_manufacturer as id2_manufacturer,
                    t.id2_os as id2_os,
                    t.id2_app_id as id2_app_id,
                    t.id2_app_version as id2_app_version
                from $soup as t
                left join $uuid_app_uniq as a on t.id1Type = a.id1Type and t.id1 = a.id1
                left join $uuid_app_uniq as b on t.id2Type = b.id1Type and t.id2 = b.id1
            ) as t
        );

        insert into `{output_apps}` with truncate
        select * from $uuid_app_uniq;

        insert into `{output_soup}` with truncate
        select
            id1, id1Type, id2, id2Type, sourceType, u, v,
            min(date_begin) as date_begin,
            max(date_end) as date_end,
            max_by(id1_model, date_end) as id1_model,
            max_by(id1_manufacturer, date_end) as id1_manufacturer,
            max_by(id1_os, date_end) as id1_os,
            max_by(id1_app_id, date_end) as id1_app_id,
            max_by(id1_app_version, date_end) as id1_app_version,
            max_by(id2_model, date_end) as id2_model,
            max_by(id2_manufacturer, date_end) as id2_manufacturer,
            max_by(id2_os, date_end) as id2_os,
            max_by(id2_app_id, date_end) as id2_app_id,
            max_by(id2_app_version, date_end) as id2_app_version
        from $updated_soup
        group by id1, id1Type, id2, id2Type, sourceType, u, v;
    '''.format(
        tx=tx.transaction_id,
        soup=preprocessed_soup,
        output_apps=output_apps,
        output_soup=output_soup,
        app_idtype=config.FAKE_APP_IDTYPE
    ))
