import luigi
import yt.wrapper as yt


from textwrap import dedent

from crypta.graph.soup.config.python import ID_TYPE
from crypta.graph.v1.python.lib.luigi import yt_luigi
from crypta.graph.v1.python.rtcconf import config
from crypta.graph.v1.python.utils import mr_utils as mr
from crypta.graph.v1.python.utils.yql_utils import run_yql
from crypta.graph.v1.python.v2.soup import soup_dirs

YQL = dedent(
    """
pragma SimpleColumns;

-- heuristic desktop yuids

$date = '{date}';
$min_active_date = CAST(CAST($date AS Date) - DateTime::IntervalFromDays(30) AS String);

$source = '{source}';

$social = ('{email}', '{login}', '{ok_id}', '{vk_id}', '{vk_name}', '{fb_id}');
$device = ('{idfa}', '{gaid}', '{distr_r1}', '{distr_ui}');

-- input data
$edges_by_crypta_id = '{v2_dir}matching/edges_by_crypta_id';
$direct = '{v2_dir}matching/direct_yandexuid_by_id_type_and_id';
$yuid_with_all_info = '{ids_storage}yandexuid/yuid_with_all_info';
$vertices = '{v2_dir}matching/vertices_no_multi_profile_by_id_type';
$socdem_folder = '{profiles}/stages/raw-yandexuid';

$shared_folder = '{shared_folder}';

--output data
$shared_heuristic = '{output}';

$yuid_cryptaId = (
    select
        id as yuid,
        cryptaId
    from $vertices
    where id_type == '{yandexuid}'
);

$desktop_nonprivate_yuids_from_russia_with_cryptaId = (
    select
        yuid,
        cryptaId
    from
        $yuid_cryptaId as t1
            join
        $yuid_with_all_info as t2
            on t1.yuid == t2.id
    where
        ua_profile like 'd|desk%' and
        ip_activity_type not in ('private', 'one_day') and
        main_region_country == 225
);

$direct_edges_with_desktop_yuid_vertices_and_cryptaId = (
    select
        cryptaId,
        target_id as yuid,
        id_type as target_id_type,
        id as target_id
    from
        $direct as t1
            join
        $desktop_nonprivate_yuids_from_russia_with_cryptaId as t2
            on t1.target_id == t2.yuid
);

$desktop_yuids_with_social_stats = (
    select
        cryptaId,
        yuid,
        count_if(target_id_type == '{avito_id}') as avito_id,
        count_if(target_id_type == '{mm_device_id}') as mm_device_id,
        count_if(target_id_type == '{email}') as email,
        count_if(target_id_type == '{fb_id}') as fb_id,
        count_if(target_id_type == '{xuniq_guid}') as xuniq_guid,
        count_if(target_id_type == '{distr_r1}') as distr_r1,
        count_if(target_id_type == '{distr_ui}') as distr_ui,
        count_if(target_id_type == '{kp_id}') as kp_id,
        count_if(target_id_type == '{login}') as login,
        count_if(target_id_type == '{ok_id}') as ok_id,
        count_if(target_id_type == '{phone}') as phone,
        count_if(target_id_type == '{puid}') as puid,
        count_if(target_id_type == '{vk_id}') as vk_id,
        count_if(target_id_type == '{vk_name}') as vk_name,
        count_if(target_id_type == '{yamoney_id}') as yamoney_id
    from $direct_edges_with_desktop_yuid_vertices_and_cryptaId
    group by
        cryptaId,
        yuid,
        target_id_type
);

$cryptaId_with_count_of_active_yuids_from_russia = (
    select
        count(*) as active_yuid_count,
        t2.cryptaId as cryptaId
    from
        $yuid_with_all_info as t1
            join
        $yuid_cryptaId as t2
            on t1.id == t2.yuid
    where
        main_region_country == 225 and
        ip_activity_type not in ('private', 'one_day')
    group by t2.cryptaId
);

$desktop_yuids_with_social_stats_and_count_of_active_yuids_from_russia = (
    select
        * without t1.cryptaId, t2.cryptaId
    from
        $desktop_yuids_with_social_stats as t1
            join
        $cryptaId_with_count_of_active_yuids_from_russia as t2
            using (cryptaId)
);

$table_paths = (
    SELECT AGGREGATE_LIST(Path)
    FROM FOLDER($socdem_folder)
    where Type == 'table'
);

$yuids_with_socdem = (
    select
        cast(yandexuid as string) as yandexuid,
        avg(m_prob) as m_avg,
        avg(age_18_24_prob) as age_18_24_avg,
        avg(segment_C_prob) as segment_C_avg
    from
        (
            select
                yandexuid,
                Yson::LookupDouble(gender, 'm') as m_prob,
                Yson::LookupDouble(user_age_6s, '18_24') as age_18_24_prob,
                (
                    Yson::LookupDouble(income_5_segments, 'C1')
                    + Yson::LookupDouble(income_5_segments, 'C2')
                ) AS segment_C_prob
            from each($table_paths)
        )
    group by yandexuid
);

$desktop_yuids_with_social_stats_and_count_of_active_yuids_from_russia_and_socdem = (
    select * without t2.yandexuid
    from
        $desktop_yuids_with_social_stats_and_count_of_active_yuids_from_russia as t1
            join
        $yuids_with_socdem as t2
            on t1.yuid == t2.yandexuid
);

$shared_destop_yuids_by_heuristic = (
    select
        yuid as id,
        '{yandexuid}' as id_type,
        $source as source
    from $desktop_yuids_with_social_stats_and_count_of_active_yuids_from_russia_and_socdem
    where
        mm_device_id == 0 and
        (
            kp_id > 1 or
            ok_id > 1 or
            fb_id > 1 or
            vk_id > 1 or
            vk_name > 1 or
            yamoney_id > 1 or
            avito_id > 1 or
            login > 2 or
            puid > 2 or
            email > 2 or
            phone > 2
        ) and
        active_yuid_count > 3 and
        xuniq_guid < 2 and
        distr_r1 < 4 and
        distr_ui < 10 and
        kp_id < 5 and
        ok_id < 5 and
        fb_id < 5 and
        vk_id < 5 and
        yamoney_id < 5 and
        vk_name < 5 and
        avito_id < 5 and
        login < 10 and
        puid < 10 and
        phone < 10 and
        email < 10 and
        active_yuid_count < 30 and
        segment_C_avg + m_avg < 1.5 and
        age_18_24_avg + m_avg < 1.5
);

$social_edges_with_cryptaId = (
    select distinct
        cryptaId,
        id,
        id_type
    from
        (
            select
                cryptaId,
                id1Type as id_type,
                id1 as id
            from $edges_by_crypta_id
            where
                ListMax(Yson::ConvertToStringList(dates)) >= $min_active_date and
                (
                    id1Type == '{yandexuid}' or
                    id1Type in $social or
                    id1Type in $device
                )
            union all
            select
                cryptaId,
                id2Type as id_type,
                id2 as id
            from $edges_by_crypta_id
            where
                ListMax(Yson::ConvertToStringList(dates)) >= $min_active_date and
                (
                    id2Type == '{yandexuid}' or
                    id2Type in $social or
                    id2Type in $device
                )
        )
);

$nonprivate_yuids_from_russia_with_cryptaId_and_device_flag = (
    select
        t1.cryptaId as cryptaId,
        t1.yuid as id,
        (t2.ua_profile like 'd|desk|%') or (t2.ua_profile like 'm|%') as is_device
    from
        $yuid_cryptaId as t1
            join
        $yuid_with_all_info as t2
            on t1.yuid == t2.id
    where
        t2.main_region_country == 225 and
        t2.ip_activity_type not in ('private', 'one_day')
);

$social_or_device_with_cryptaId = (
    select
        cryptaId,
        'social' as id_type
    from
        (
            select distinct cryptaId
            from $social_edges_with_cryptaId
            where id_type in $social
        )
    union all
    select
        cryptaId,
        'device' as id_type
    from
        (
            select distinct cryptaId
            from $social_edges_with_cryptaId
            where id_type in $device
        )
);

$devices_nonprivate_yuids_from_russia_with_cryptaId_and_device_flag = (
    select
        cryptaId,
        'device' as id_type
    from $nonprivate_yuids_from_russia_with_cryptaId_and_device_flag
    where is_device
);

$summary = (
    select
        cryptaId,
        '{yandexuid}' as id_type
    from $nonprivate_yuids_from_russia_with_cryptaId_and_device_flag
    union all
    select *
    from $social_or_device_with_cryptaId
    union all
    select *
    from $devices_nonprivate_yuids_from_russia_with_cryptaId_and_device_flag
);

$cryptaid_like_human = (
    select
        cryptaId,
        AGGREGATE_LIST(distinct id_type) as id_types
    from $summary
    group by cryptaId
);

$like_human = (
    select distinct cryptaId
    from $cryptaid_like_human
    where
        ListHas(id_types, '{yandexuid}') and
        ListHas(id_types, 'device') and
        ListHas(id_types, 'social')
);

$yuids_like_human = (
    select id1 as id
    from
        $edges_by_crypta_id as t1
            left semi join
        $like_human as t2
            using (cryptaId)
    where id1Type == '{yandexuid}'
    union all
    select id2 as id
    from
        $edges_by_crypta_id as t1
            left semi join
        $like_human as t2
            using (cryptaId)
    where id2Type == '{yandexuid}'
);

$shared_yuids_like_human = (
    select *
    from
        $shared_destop_yuids_by_heuristic as t1
            left semi join
        $yuids_like_human as t2
            using (id)
);

$shared_folder_last_table = (
    select AsList(max(Path) ?? "not found")
    from folder($shared_folder)
    where
    Type = "table"
);

define action $insert_with_last() as
    INSERT INTO $shared_heuristic WITH TRUNCATE
    SELECT id, id_type, source
    FROM (
        SELECT *
        FROM $shared_yuids_like_human
        UNION ALL
        SELECT *
        FROM EACH($shared_folder_last_table)
    )
    GROUP BY
        id,
        Unwrap(id_type ?? '') AS id_type,
        Unwrap(source ?? '') AS source
    ORDER BY id, id_type;
end define;

define action $insert_without_last() as
    INSERT INTO $shared_heuristic WITH TRUNCATE
    SELECT id, id_type, source
    FROM $shared_yuids_like_human
    GROUP BY
        id,
        Unwrap(id_type ?? '') AS id_type,
        Unwrap(source ?? '') AS source
    ORDER BY id, id_type;
end define;

EVALUATE IF $shared_folder_last_table == AsList("not found")
do $insert_without_last()
else
do $insert_with_last();
"""
)


class HeuristicSharedDesktopYuids(yt_luigi.BaseYtTask):
    date = luigi.Parameter()

    def __init__(self, *args, **kwargs):
        super(HeuristicSharedDesktopYuids, self).__init__(*args, **kwargs)

        self.source = "Heuristic shared desktop yuid"
        self.output_folder_name = "heuristic_desktop_yuids"

    def requires(self):
        return [
            yt_luigi.ExternalInput(yt.ypath_join(self.in_f("v2_dir"), "matching", "edges_by_crypta_id")),
            yt_luigi.ExternalInput(
                yt.ypath_join(self.in_f("v2_dir"), "matching", "direct_yandexuid_by_id_type_and_id")
            ),
            yt_luigi.ExternalInput(
                yt.ypath_join(self.in_f("v2_dir"), "matching", "vertices_no_multi_profile_by_id_type")
            ),
            yt_luigi.ExternalInput(yt.ypath_join(self.in_f("ids_storage"), "yandexuid", "yuid_with_all_info")),
            yt_luigi.ExternalFolder(yt.ypath_join(self.in_f("profiles"), "stages", "raw-yandexuid")),
        ]

    def input_folders(self):
        return {
            "v2_dir": soup_dirs.V2_DIR,
            "ids_storage": config.CRYPTA_IDS_STORAGE,
            "profiles": yt.ypath_join(config.CRYPTA_GRAPH_CRYPTA_HOME, "profiles"),
        }

    def output_folders(self):
        return {"shared_ids": config.CRYPTA_SHARED_IDS_FOLDER}

    @property
    def output_path(self):
        return yt.ypath_join(self.out_f("shared_ids"), self.output_folder_name, self.date)

    def output(self):
        return yt_luigi.YtDateTarget(self.output_path, self.date)

    @staticmethod
    def desc():
        return "HEURISTIC_DESKTOP"

    def run(self):
        query = YQL.format(
            avito_id=ID_TYPE.AVITO_ID.Name,
            date=self.date,
            distr_r1=ID_TYPE.DISTR_R1.Name,
            distr_ui=ID_TYPE.DISTR_UI.Name,
            email=ID_TYPE.EMAIL.Name,
            fb_id=ID_TYPE.FB_ID.Name,
            gaid=ID_TYPE.GAID.Name,
            idfa=ID_TYPE.IDFA.Name,
            ids_storage=self.in_f("ids_storage"),
            kp_id=ID_TYPE.KINOPOISK_ID.Name,
            login=ID_TYPE.LOGIN.Name,
            mm_device_id=ID_TYPE.MM_DEVICE_ID.Name,
            ok_id=ID_TYPE.OK_ID.Name,
            output=self.output_path,
            phone=ID_TYPE.PHONE.Name,
            profiles=self.in_f("profiles"),
            puid=ID_TYPE.PUID.Name,
            shared_folder=yt.ypath_dirname(self.output_path),
            source=self.source,
            v2_dir=self.in_f("v2_dir"),
            vk_id=ID_TYPE.VK_ID.Name,
            vk_name=ID_TYPE.VK_NAME.Name,
            xuniq_guid=ID_TYPE.XUNIQ_GUID.Name,
            yamoney_id=ID_TYPE.YAMONEY_ID.Name,
            yandexuid=ID_TYPE.YANDEXUID.Name,
        )
        run_yql(query=query)

        mr.set_generate_date(self.output_path, self.date)
