import os


def os_get_bool(key, default):
    return os.getenv(key, default).lower() in {"1", "t", "true", "y", "yes", "+", "on"}


def os_get_list(key, default_list=[], separator=" "):
    value = os.getenv(key)
    if value:
        return value.split(separator)
    else:
        return default_list


# runtime config
CRYPTA_ENV = os.getenv("CRYPTA_ENV")
REACTOR_TOKEN = os.getenv("REACTOR_TOKEN")
CRYPTA_DC = os.getenv("CRYPTA_DC")
MR_EXEC = os.getenv("RTCRYPTA_MAPREDUCE_EXEC")
MR_SERVER = os.getenv("RTCRYPTA_MR_SERVER")
AUDIENCE_MR_SERVER = os.getenv("AUDIENCE_MR_SERVER")
AUDIENCE_YT_TOKEN = os.getenv("AUDIENCE_YT_TOKEN")
COPY_TO_HAHN_TOKEN = os.getenv("COPY_TO_HAHN_TOKEN")
STREAM_DONE = os.getenv("STREAM_DONE")

LUIGID_URL = os.getenv("LUIGID_URL")
LUIGID_HOST = os.getenv("LUIGID_HOST")
LUIGID_PORT = os.getenv("LUIGID_PORT")
LUIGID_WORKERS = int(os.getenv("LUIGID_WORKERS", "10"))
LUIGI_RUN_TAGS = set(os_get_list("LUIGI_RUN_TAGS"))
LUIGI_FILTER_TAGS = set(os_get_list("LUIGI_FILTER_TAGS"))
LUIGI_FAIL_TAGS = set(os_get_list("LUIGI_FAIL_TAGS"))

CRYPTA_UPLOAD_ENABLED = os.getenv("CRYPTA_UPLOAD_ENABLED")
CRYPTA_AUDIENCE_ENABLED = os.getenv("CRYPTA_AUDIENCE_ENABLED")
CRYPTA_STATFACE_ENABLED = os.getenv("CRYPTA_STATFACE_ENABLED")
HAS_BSYETI_TABLES = os.getenv("HAS_BSYETI_TABLES")

TEST_FAIL_REPORTING = os.getenv("TEST_FAIL_REPORTING", "no")

# matrixnet apllying binary
MX_OPS_BIN = os.getenv("MX_OPS_BIN")

CRYPTA_GRAPH_CRYPTA_HOME = os.getenv("CRYPTA_GRAPH_CRYPTA_HOME")
# have no idea config section
DIT_MSK_DAY_FOLDER = os.getenv("DIT_MSK_DAY_FOLDER")
COOKIE_MATCHING_FOLDER = os.getenv("COOKIE_MATCHING_FOLDER")
VKID_PUID_DMP_FOLDER = os.getenv("VKID_PUID_DMP_FOLDER")
LOCAL_OUTPUT_FOLDER = os.getenv("GRAPH_LOCAL_OUTPUT_FOLDER")
RIPE_LOCAL_DIR = os.getenv("GRAPH_RIPE_LOCAL_DIR")
IP_LOCAL_DIR = os.getenv("GRAPH_IP_LOCAL_DIR")
RIPE_FNAME_4 = os.getenv("GRAPH_RIPE_FNAME_4")
RIPE_FNAME_6 = os.getenv("GRAPH_RIPE_FNAME_6")
BAD_IPS_FNAME = os.getenv("GRAPH_BAD_IPS_FNAME")

# some outer folders and tables
DATA_TO_CLASSIFY = os.getenv("DATA_TO_CLASSIFY")
FP_FOLDER = os.getenv("FP_FOLDER")
FRESH_FP_FOLDER = os.getenv("FRESH_FP_FOLDER")
WEBVISOR_LOGIN_FOLDER_PROCESSED = os.getenv("WEBVISOR_LOGIN_FOLDER_PROCESSED")
PROFILES_EXPORT = os.getenv("PROFILES_EXPORT")
PROFILES_STORAGE = os.getenv("PROFILES_STORAGE")
PROFILES_BASE_FOLDER = os.getenv("PROFILES_BASE_FOLDER")
PASSPORT_SOCIAL_FOLDER = os.getenv("PASSPORT_SOCIAL_FOLDER")
METRIKA_INPUT_FOLDER = os.getenv("METRIKA_INPUT_FOLDER")
MOBMET_APP_ID_AND_DOMAINS = os.getenv("MOBMET_APP_ID_AND_DOMAINS")
MOBMET_COUNTERS_AND_DOMAINS = os.getenv("MOBMET_COUNTERS_AND_DOMAINS")

# some inner folders and tables
GRAPH_HIST_FOLDER = os.getenv("GRAPH_HIST_FOLDER")
FRAUD_IPS_TABLE = os.getenv("FRAUD_IPS")
IS_OUTPUT_FOLDER = os.getenv("GRAPH_ISCRYPTA_FOLDER")
DEV_CID_TABLE = os.getenv("DEV_CID_TABLE")
PASSPORT_AUTH_PREFIX = "//crypta/production/passport_auth"
GRAPH_PARTNERS_DATA = os.getenv("GRAPH_PARTNERS_DATA")
GRAPH_EXTERNAL_DUMPS = os.getenv("GRAPH_EXTERNAL_DUMPS")

# production process base folders and tables
YT_OUTPUT_FOLDER = os.getenv("GRAPH_YT_OUTPUT_FOLDER")  # TODO: the same as GRAPH_YT_OUTPUT_FOLDER
AUDIENCE_YT_FOLDER = os.getenv("AUDIENCE_YT_FOLDER")
INDEVICE_YT_FOLDER = os.getenv("INDEVICE_YT_FOLDER")
GRAPH_STREAM_FOLDER = os.getenv("GRAPH_STREAM_FOLDER")
GRAPH_YT_DICTS_FOLDER = os.getenv("GRAPH_YT_DICTS_FOLDER")
VERTICES_TABLE = os.getenv("VERTICES_TABLE")
STORE_DAYS = os.getenv("GRAPH_STORE_DAYS")
AUDIENCE_STORE_DAYS = os.getenv("AUDIENCE_STORE_DAYS")
BS_CHEVENT_SEARCH_DAYS = int(os.getenv("BS_CHEVENT_SEARCH_DAYS", "0"))

CRYPTA_GRAPH_LOGFELLER_YT_PATH = os.getenv("CRYPTA_GRAPH_LOGFELLER_YT_PATH")
CRYPTA_IDS_STORAGE = os.getenv("CRYPTA_IDS_STORAGE")
CRYPTA_SHARED_IDS_FOLDER = os.getenv("CRYPTA_SHARED_IDS_FOLDER")
LOG_LINKS_FOLDER = os.path.join(YT_OUTPUT_FOLDER, "logs") if YT_OUTPUT_FOLDER else None
ACCESS_LOGS_FOLDER = os.path.join(LOG_LINKS_FOLDER, "access-log") if LOG_LINKS_FOLDER else None

# radius splunk
RADIUS_SPLUNK_URL = os.getenv("RADIUS_SPLUNK_URL")
RADIUS_SPLUNK_PORT = os.getenv("RADIUS_SPLUNK_PORT")
RADIUS_SPLUNK_USR = os.getenv("RADIUS_SPLUNK_USR")
RADIUS_SPLUNK_PASS_PATH = os.getenv("RADIUS_SPLUNK_PASS_PATH")
RADIUS_SPLUNK_PASSWORD = os.getenv("RADIUS_SPLUNK_PASSWORD")

RADIUS_CLIENT_TVM_ID = int(os.getenv("RADIUS_CLIENT_TVM_ID", "0"))
RADIUS_SERVER_TVM_ID = int(os.getenv("RADIUS_SERVER_TVM_ID", "0"))
RADIUS_TVM_SECRET = os.getenv("RADIUS_TVM_SECRET")

RADIUS_LOG_LOCAL_FOLDER = os.getenv("RADIUS_LOG_LOCAL_FOLDER")
RADIUS_LOG_YT_FOLDER = os.getenv("RADIUS_LOG_YT_FOLDER")
RADIUS_METRICS_YT_FOLDER = os.getenv("RADIUS_METRICS_YT_FOLDER")
RADIUS_METRICS_LOCAL_FOLDER = os.getenv("RADIUS_METRICS_LOCAL_FOLDER")

STATFACE_USERNAME = os.getenv("STATFACE_USERNAME")
STATFACE_OAUTH = os.getenv("STATFACE_OAUTH")

SANDBOX_OAUTH = os.getenv("SANDBOX_OAUTH")

LOGFELLER_BAR_LOG_FOLDER = os.getenv("LOGFELLER_BAR_LOG_FOLDER")
LOGFELLER_BS_WATCH_FOLDER = os.getenv("LOGFELLER_BS_WATCH_FOLDER")
LOGFELLER_EAL_FOLDER = os.getenv("LOGFELLER_EAL_FOLDER")
LOGFELLER_KINOPOISK_FOLDER = os.getenv("LOGFELLER_KINOPOISK_FOLDER")
LOGFELLER_MOB_REPORT_LOG_FOLDER = os.getenv("LOGFELLER_MOB_REPORT_LOG_FOLDER")
LOGFELLER_REQANS_FOLDER = os.getenv("LOGFELLER_REQANS_FOLDER")
LOGFELLER_REQANS_NEW_FOLDER = os.getenv("LOGFELLER_REQANS_NEW_FOLDER")
LOGFELLER_REQANS_ALICE_FOLDER = os.getenv("LOGFELLER_REQANS_ALICE_FOLDER")
LOGFELLER_RTB_LOG_FOLDER = os.getenv("LOGFELLER_RTB_LOG_FOLDER")
LOGFELLER_RTGEO_FOLDER = os.getenv("LOGFELLER_RTGEO_FOLDER")
LOGFELLER_SBAPI_MITB_LOG_FOLDER = os.getenv("LOGFELLER_SBAPI_MITB_LOG_FOLDER")
LOGFELLER_SENDER_FOLDER = os.getenv("LOGFELLER_SENDER_FOLDER")
LOGFELLER_SDK_FOLDER = os.getenv("LOGFELLER_SDK_FOLDER")
LOGFELLER_VISIT_V2_LOG_FOLDER = os.getenv("LOGFELLER_VISIT_V2_LOG_FOLDER")
LOGFELLER_VISIT_V2_PRIVATE_LOG_FOLDER = os.getenv("LOGFELLER_VISIT_V2_PRIVATE_LOG_FOLDER")

PASSPORT_USERDATA = os.getenv("PASSPORT_USERDATA")

STATBOX_CUBE_INSTALL = os.getenv("STATBOX_CUBE_INSTALL")
STATBOX_YABS_FOLDER = os.getenv("STATBOX_YABS_FOLDER")

SOVETNIK_INPUT_FOLDER = os.getenv("SOVETNIK_INPUT_FOLDER")
AUTORU_INPUT_FOLDER = os.getenv("AUTORU_INPUT_FOLDER")
AUTORU_WAREHOUSE_FOLDER = os.getenv("AUTORU_WAREHOUSE_FOLDER")

GRAPH_YT_OUTPUT_FOLDER = os.getenv("GRAPH_YT_OUTPUT_FOLDER")
GRAPH_YT_STAT_FOLDER = os.getenv("GRAPH_YT_STAT_FOLDER")
GRAPH_FOLDER = GRAPH_YT_OUTPUT_FOLDER
V1_ARCHIVE_FOLDER = GRAPH_YT_OUTPUT_FOLDER if CRYPTA_ENV != "production" else "//home/crypta/archive/graph/v1/"

MONRUN_FOLDER = os.getenv("MONRUN_FOLDER")
MONRUN_DATE_FOLDER = os.getenv("MONRUN_DATE_FOLDER")
MONRUN_GRAPH = ""
LOG_FOLDER = os.getenv("RTCRYPTA_PYSTARTER_LOG_FOLDER")

# TODO: add all log folders here
LOG_FOLDERS = {
    "yabs": STATBOX_YABS_FOLDER,
    "bs_watch": LOGFELLER_BS_WATCH_FOLDER,
    "reqans": LOGFELLER_REQANS_FOLDER,
    "reqans_new": LOGFELLER_REQANS_NEW_FOLDER,
    "reqans_alice": LOGFELLER_REQANS_ALICE_FOLDER,
    "bar": LOGFELLER_BAR_LOG_FOLDER,
    "fp": FP_FOLDER,
    "redir": os.getenv("LOGFELLER_REDIR_FOLDER"),
    "mob_tracking": os.getenv("LOGFELLER_MOBILE_TRACKING"),
    "mob_tracking_private": os.getenv("LOGFELLER_MOBILE_TRACKING_PRIVATE"),
    "oauth": os.getenv("LOGFELLER_OAUTH_FOLDER"),
    "passport": os.getenv("LOGFELLER_PASSPORT_FOLDER"),
    "passport_phone": os.getenv("LOGFELLER_PASSPORT_PHONE_LOG_FOLDER"),
    "passport_sensitive": os.getenv("LOGFELLER_PASSPORT_SENSITIVE_LOG_FOLDER"),
    "bschevent": os.getenv("LOGFELLER_BS_CHEVENT_LOG_FOLDER"),
    "auto": os.getenv("LOGFELLER_AUTO_FRONT_LOG_FOLDER"),
    "postback": os.getenv("POSTBACK_LOG_FOLDER"),
    "postclick": os.getenv("POSTCLICK_LOG_FOLDER"),
    "bs_xuniqs": os.getenv("BS_XUNIQS_LOG_FOLDER"),
    "bs_hitlog": os.getenv("BS_HIT_LOG_FOLDER"),
    "bs_rtblog": os.getenv("BS_RTB_LOG_FOLDER"),
    "bs_webvisor_log": os.getenv("BS_WEBVISOR_LOG_FOLDER"),
}


def _flatten(double_dict):
    for k1, v1 in double_dict.iteritems():
        for k2, _ in v1.iteritems():
            yield (k1, k2)


# ==== YUID-YUID PAIRS =====
# source types
ID_SOURCE_TYPE_PASSPORT = "passport"
ID_SOURCE_TYPE_PASSPORT_DUMP = "passport_dump"
ID_SOURCE_TYPE_PASSPORT_SENSITIVE = "passport_sensitive"
ID_SOURCE_TYPE_PASSPORT_SERVER = "passport_server"  # server side passport logs
ID_SOURCE_TYPE_WATCH_LOG_MAILRU = "wl_mailru"
ID_SOURCE_TYPE_SOCIAL = "social"
ID_SOURCE_TYPE_PAGE_TITLE = "page_title"
ID_SOURCE_TYPE_WEBVISOR = "webvisor"
ID_SOURCE_TYPE_SENDER = "sender"
ID_SOURCE_TYPE_AUTO = "auto"
ID_SOURCE_TYPE_KINOPOISK = "kp"
ID_SOURCE_TYPE_YAMONEY = "yamoney"
ID_SOURCE_TYPE_TICKETS = "tickets"
ID_SOURCE_TYPE_BARLOG = "barlog"  # desktop yandex browser and bar plugins
ID_SOURCE_TYPE_EAL = "eal"  # export access log
ID_SOURCE_TYPE_EXTERNAL_BROWSERS = "ext_bro"  # export access log :: external browsers from YaBro
ID_SOURCE_TYPE_BROWSER_MANAGER = "bm"  # sbapi-mitb log
ID_SOURCE_TYPE_PUNTO = "punto"  # export access log punto switcher
ID_SOURCE_TYPE_NEIGHBOUR_COOKIES = "neighbour_cookies"  # yandex browser reports cookies of neighbour browsers
ID_SOURCE_TYPE_FP = "fp"  # only for those fingerprints ids, that do no have several sources or specific types
ID_SOURCE_TYPE_WATCH_LOG = "watch_log"
ID_SOURCE_TYPE_MOBILE_METRIKA = "mm"
ID_SOURCE_TYPE_VK = "vk"
ID_SOURCE_TYPE_DITMSK = "ditmsk"
ID_SOURCE_TYPE_SOVETNIK = "sovetnik"

# not used to match
ID_SOURCE_TYPE_ECOMMERCE_LOG = "ecommerce"  # special events in watch log

# id types

# basic matching ids
ID_TYPE_DEVID = "devid"
ID_TYPE_DEVID_HASH = "devidhash"
ID_TYPE_DUID = "duid"
ID_TYPE_UUID = "uuid"
ID_TYPE_YUID = "yuid"
ID_TYPE_MAC = "mac"

ID_TYPE_FUID = "fuid"
ID_TYPE_PUID = "puid"
ID_TYPE_VKCOM = "vk"
ID_TYPE_VKCOM_NAME = "vk_name"
ID_TYPE_FACEBOOK_ID = "fb"
ID_TYPE_OKRU = "ok"
ID_TYPE_AVITO = "avito"
ID_TYPE_BAR_UI = "ui"  # client side UI for yandex software
ID_TYPE_BAR_R1 = "r1"  # server side UI for yandex software
ID_TYPE_IP = "ip"
ID_TYPE_KINOPOISK_UID = "kp_uid"

ID_TYPE_YAMONEYID = "yamoney_id"
ID_TYPE_YAMONEY_CARD_TOKEN = "yamoney_card"
ID_TYPE_YAMONEY_ACCOUNT = "yamoney_acc"
ID_TYPE_YAMONEY_INTERNAL = "yamoney_internal"

ID_TYPE_LOGIN = "login"

ID_TYPE_EMAIL = "email"
ID_TYPE_EMAIL_HASH = "email_hash"
ID_TYPE_PHONE = "phone"
ID_TYPE_PHONE_HASH = "phone_hash"

ID_TYPE_SOCDEM = "socdem"
ID_TYPE_DATE = "date"
ID_TYPE_DITID = "dit_id"
ID_TYPE_MMETRIC_DEVID = "mmetric_devid"

PAIR_TYPE_CROSS_SOURCE = "cross"

# webvisor date limits
WEBVISOR_MINYEAR = 1946
WEBVISOR_MAXYEAR = 2000

# not used to match
ID_TYPE_REGION = "region"  # region from geobase
ID_TYPE_PURCHASE_DOMAIN = "purchase_domain"  # any domain

default_yuids_per_id_limit = 5
default_ids_per_yuid_strict_limit = 5
default_ids_per_yuid_soft_limit = 3

TICKETS_MIN_COUNT = 2


class YuidPairType:
    """
    Different types of yuid-id-yuid pairs
    """

    def __init__(
        self,
        id_type,
        source_types,
        base_weight=1.0,
        custom_weights=None,
        yuids_per_id_strict_limit=default_yuids_per_id_limit,
        yuids_per_id_soft_limit=default_yuids_per_id_limit,
        ids_per_yuid_strict_limit=default_ids_per_yuid_strict_limit,
        ids_per_yuid_soft_limit=default_ids_per_yuid_soft_limit,
        human_limit=None,
        filter_private_mode=False,
        exact=True,
        required=True,
    ):
        """
        :param id_type: type of id, forming the yuid-id-yuid edge
        :param source_types: one or several sources that the edge of this type came from
        :param base_weight: we trust more or less to some of the edge types
        :param custom_weights: we trust more or less to some of the sources of this edge type
        :param yuids_per_id_strict_limit: if this limit of ids per one yuid is exceeded, we throw this yuid
        :param yuids_per_id_soft_limit: if this limit N of ids per one yuid is exceeded, we choose N most active ids
        :param ids_per_yuid_strict_limit: if this limit of yuids per one id is exceeded, we throw the id
        :param ids_per_yuid_soft_limit: if this limit N of yuids per one id is exceeded, we choose N most active yuids
        :param human_limit: typical human doesn't have more then N ids of this type
        :param filter_private_mode: private mode yuids won't go to matching for this pair type
        :param required: if the pair table is required to be not empty
        :param exact: not-probabilistic
        """
        self.id_type = id_type
        self.source_types = source_types
        self.base_weight = base_weight
        if custom_weights:
            self.custom_weights = custom_weights
        else:
            self.custom_weights = dict()
        self.yuids_per_id_strict_limit = yuids_per_id_strict_limit
        self.yuids_per_id_soft_limit = yuids_per_id_soft_limit
        self.ids_per_yuid_strict_limit = ids_per_yuid_strict_limit
        self.ids_per_yuid_soft_limit = ids_per_yuid_soft_limit
        self.human_limit = human_limit
        self.filter_private_mode = filter_private_mode
        self.exact = exact
        self.required = required

    def name(self):
        if self.is_aggregate():
            return self.id_type
        else:
            # there is no reasons to use source_type for non-aggregated types
            # but it's used... just for compatibility
            return self.id_type + "_" + self.source_types[0]

    def names_per_source(self):
        for s in self.source_types:
            yield self.id_type + "_" + s

    def is_aggregate(self):
        return len(self.source_types) > 1


YUID_PAIR_TYPES_EXACT = [
    YuidPairType(
        ID_TYPE_EMAIL,
        [
            ID_SOURCE_TYPE_PASSPORT,  # email produced from login
            ID_SOURCE_TYPE_SOCIAL,
            ID_SOURCE_TYPE_PAGE_TITLE,
            ID_SOURCE_TYPE_WEBVISOR,
            ID_SOURCE_TYPE_SENDER,
            ID_SOURCE_TYPE_YAMONEY,
            ID_SOURCE_TYPE_TICKETS,
            # ID_SOURCE_TYPE_AUTO,
            ID_SOURCE_TYPE_KINOPOISK,
            # ID_SOURCE_TYPE_DITMSK,  # uncomment it later, when email_hash id_type will be avaliable
            ID_SOURCE_TYPE_BARLOG,
        ],
        base_weight=0.8,
        custom_weights={ID_SOURCE_TYPE_WEBVISOR: 0.3, PAIR_TYPE_CROSS_SOURCE: 0.6},
        yuids_per_id_strict_limit=10,
        yuids_per_id_soft_limit=5,
        ids_per_yuid_soft_limit=3,
        ids_per_yuid_strict_limit=5,
        human_limit=3,
    ),
    YuidPairType(
        ID_TYPE_PHONE,
        [
            ID_SOURCE_TYPE_PASSPORT,
            ID_SOURCE_TYPE_PASSPORT_SENSITIVE,
            ID_SOURCE_TYPE_PASSPORT_DUMP,
            ID_SOURCE_TYPE_WEBVISOR,
            ID_SOURCE_TYPE_SOCIAL,
            ID_SOURCE_TYPE_YAMONEY,
            ID_SOURCE_TYPE_TICKETS,
            # ID_SOURCE_TYPE_AUTO,
            ID_SOURCE_TYPE_DITMSK,
            ID_SOURCE_TYPE_VK + "_" + ID_SOURCE_TYPE_FP,
            ID_SOURCE_TYPE_VK + "_" + ID_SOURCE_TYPE_BARLOG,
        ],
        base_weight=0.8,
        ids_per_yuid_soft_limit=2,
        ids_per_yuid_strict_limit=3,
        human_limit=2,
    ),
    YuidPairType(
        ID_TYPE_LOGIN,
        [ID_SOURCE_TYPE_FP],
        base_weight=0.8,
        yuids_per_id_strict_limit=10,
        yuids_per_id_soft_limit=5,
        ids_per_yuid_soft_limit=2,
        ids_per_yuid_strict_limit=3,
        human_limit=3,
    ),
    YuidPairType(
        ID_TYPE_VKCOM,
        [ID_SOURCE_TYPE_FP, ID_SOURCE_TYPE_BARLOG, ID_SOURCE_TYPE_SOVETNIK],
        ids_per_yuid_soft_limit=1,
        ids_per_yuid_strict_limit=4,
        human_limit=1,
    ),
    YuidPairType(
        ID_TYPE_VKCOM_NAME,
        [ID_SOURCE_TYPE_SOVETNIK],
        ids_per_yuid_soft_limit=1,
        ids_per_yuid_strict_limit=4,
        human_limit=1,
    ),
    YuidPairType(
        ID_TYPE_OKRU,
        [ID_SOURCE_TYPE_FP, ID_SOURCE_TYPE_BARLOG, ID_SOURCE_TYPE_SOVETNIK],
        ids_per_yuid_soft_limit=1,
        ids_per_yuid_strict_limit=4,
        human_limit=1,
    ),
    # YuidPairType(ID_TYPE_FUID, [ID_SOURCE_TYPE_FP],
    #              ids_per_yuid_soft_limit=1,  # expect each yuid have only one fuid
    #              ids_per_yuid_strict_limit=3,  # for some technical problems when above is not true
    #              filter_private_mode=True),
    YuidPairType(
        ID_TYPE_BAR_UI,
        [
            ID_SOURCE_TYPE_BARLOG,
            ID_SOURCE_TYPE_EAL,
            ID_SOURCE_TYPE_PUNTO,
            ID_SOURCE_TYPE_BROWSER_MANAGER,
            ID_SOURCE_TYPE_EXTERNAL_BROWSERS,
        ],
        ids_per_yuid_soft_limit=3,  # assume 2-3 apps and plugins
        ids_per_yuid_strict_limit=5,  # for some technical problems when above is not true
        filter_private_mode=True,
    ),
    YuidPairType(
        ID_TYPE_BAR_R1,
        [ID_SOURCE_TYPE_BARLOG],
        base_weight=0.8,
        ids_per_yuid_soft_limit=2,  # assume 1-2 apps and plugins
        ids_per_yuid_strict_limit=3,
    ),  # for some technical problems when above is not true
    YuidPairType(ID_TYPE_YAMONEY_CARD_TOKEN, [ID_SOURCE_TYPE_YAMONEY]),
    YuidPairType(ID_TYPE_YAMONEY_ACCOUNT, [ID_SOURCE_TYPE_YAMONEY]),
    YuidPairType(
        ID_TYPE_KINOPOISK_UID,
        [ID_SOURCE_TYPE_KINOPOISK],
        ids_per_yuid_soft_limit=1,  # why do you need more than one KP account
        ids_per_yuid_strict_limit=2,
    ),
]

# id_type -> yuid_pair_type
YUID_PAIR_TYPES_DICT = dict()
for pair in YUID_PAIR_TYPES_EXACT:
    YUID_PAIR_TYPES_DICT[pair.id_type] = pair

YUIR_PAIR_AGGREGATE_TYPES = [p.id_type for p in YUID_PAIR_TYPES_EXACT if p.is_aggregate()]

# ======== DEVID PAIRS ========

# mobile source types
ID_SOURCE_TYPE_ACCOUNT_MANAGER = "am"
ID_SOURCE_TYPE_PASSPORT_OAUTH = "oauth"
ID_SOURCE_TYPE_TRACK = "track"
ID_SOURCE_TYPE_REDIR = "redir"
ID_SOURCE_TYPE_SDK = "sdk"
ID_SOURCE_TYPE_YABROWSER_ANDROID = "yabrowser_android"
ID_SOURCE_TYPE_YABROWSER_IOS = "yabrowser_ios"
ID_SOURCE_TYPE_ACCESS_LOG = "access_log"
ID_SOURCE_TYPE_APPSFLYER = "appsflyer"
ID_SOURCE_TYPE_VMETRO = "vmetro"
ID_SOURCE_TYPE_POSTCLICK = "postclick"
ID_SOURCE_TYPE_METRICA_SOCKETS_ANDROID = "metrica_sockets_android"
ID_SOURCE_TYPE_METRICA_SOCKETS_IOS = "metrica_sockets_ios"
ID_SOURCE_TYPE_WATCH_YP_IOS = "watch_yp_did_ios"
ID_SOURCE_TYPE_WATCH_YP_ANDROID = "watch_yp_did_android"
ID_SOURCE_TYPE_ACCESS_YP_IOS = "access_yp_did_ios"
ID_SOURCE_TYPE_ACCESS_YP_ANDROID = "access_yp_did_android"

# device match types
INDEVICE = "indev"
CROSSDEVICE = "crdev"

UA_PROFILE_MATCHES = "ua_matches"
UA_PROFILE_NOT_MATCHES = "indev_no_match"

NOT_FOUND_YUID_UA = "not_found_yuid_ua"
NOT_FOUND_DEV_INFO = "not_found_dev_info"

NO_MATCH_TYPES = [UA_PROFILE_NOT_MATCHES, NOT_FOUND_YUID_UA, NOT_FOUND_DEV_INFO]

default_yuids_per_devid_limit = 7
default_devids_per_id_limit = 5


class DevidPairType:
    def __init__(
        self,
        source_type,
        match_types,
        base_weight=3.0,  # most of devid-yuid pairs are generated by some API
        yuids_per_devid_limit=default_yuids_per_devid_limit,
        exact=True,
        enabled=True,
        strong=False,
    ):
        self.source_type = source_type
        self.match_types = match_types
        self.base_weight = base_weight
        self.yuids_per_devid_limit = yuids_per_devid_limit
        self.exact = exact
        self.active = enabled
        # ::strong:: set to True for unbreakable links like indevice-YaBro
        # to avoid breaking them in graph_clustering.py
        self.strong = strong

    def name_per_match_type(self):
        source = (self.source_type + "_") if self.source_type else ""
        for mt in self.match_types:
            yield source + mt


# device pair types
# Important: in descending priority order.

DEVID_PAIR_TYPES_PERFECT = [
    DevidPairType(ID_SOURCE_TYPE_YABROWSER_ANDROID, [INDEVICE], base_weight=1, strong=True),
    DevidPairType(ID_SOURCE_TYPE_SDK, [INDEVICE], base_weight=1, strong=True),
    DevidPairType(ID_SOURCE_TYPE_METRICA_SOCKETS_ANDROID, [INDEVICE], base_weight=1, strong=True),
    DevidPairType(ID_SOURCE_TYPE_METRICA_SOCKETS_IOS, [INDEVICE], base_weight=1, strong=True),
    DevidPairType(ID_SOURCE_TYPE_PASSPORT_OAUTH, [INDEVICE, CROSSDEVICE], base_weight=1),
    DevidPairType(ID_SOURCE_TYPE_ACCOUNT_MANAGER, [INDEVICE, CROSSDEVICE], base_weight=1),
    DevidPairType(ID_SOURCE_TYPE_REDIR, [INDEVICE]),
    DevidPairType(ID_SOURCE_TYPE_POSTCLICK, [INDEVICE]),
    DevidPairType(ID_SOURCE_TYPE_WATCH_LOG, [INDEVICE], base_weight=2.0),
    DevidPairType(ID_SOURCE_TYPE_ACCESS_LOG, [INDEVICE]),
    DevidPairType(ID_SOURCE_TYPE_TRACK, [INDEVICE]),
    DevidPairType(ID_SOURCE_TYPE_VMETRO, [INDEVICE]),
    DevidPairType(ID_SOURCE_TYPE_WATCH_YP_IOS, [INDEVICE]),
    DevidPairType(ID_SOURCE_TYPE_WATCH_YP_ANDROID, [INDEVICE]),
    DevidPairType(ID_SOURCE_TYPE_ACCESS_YP_IOS, [INDEVICE]),
    DevidPairType(ID_SOURCE_TYPE_ACCESS_YP_ANDROID, [INDEVICE]),
]

# source_type -> devid_pair_type
DEVID_PAIR_TYPES_PERFECT_DICT = {p.source_type: p for p in DEVID_PAIR_TYPES_PERFECT}

DEVID_PERFECT_INDEV_PRIORITY = [
    ID_SOURCE_TYPE_METRICA_SOCKETS_ANDROID,
    ID_SOURCE_TYPE_METRICA_SOCKETS_IOS,
    ID_SOURCE_TYPE_YABROWSER_ANDROID,
    ID_SOURCE_TYPE_YABROWSER_IOS,
    ID_SOURCE_TYPE_SDK,
    ID_SOURCE_TYPE_WATCH_LOG,
    ID_SOURCE_TYPE_ACCESS_LOG,
    ID_SOURCE_TYPE_TRACK,
    ID_SOURCE_TYPE_VMETRO,
    ID_SOURCE_TYPE_PASSPORT_OAUTH,
    ID_SOURCE_TYPE_ACCOUNT_MANAGER,
    ID_SOURCE_TYPE_REDIR,
    ID_SOURCE_TYPE_POSTCLICK,
    ID_SOURCE_TYPE_WATCH_YP_IOS,
    ID_SOURCE_TYPE_WATCH_YP_ANDROID,
    ID_SOURCE_TYPE_ACCESS_YP_IOS,
    ID_SOURCE_TYPE_ACCESS_YP_ANDROID,
]

DEVID_PAIRS_NAMES_PERFECT = [name for p in DEVID_PAIR_TYPES_PERFECT for name in p.name_per_match_type()]
DEVID_PAIRS_NAMES_ALL = DEVID_PAIRS_NAMES_PERFECT

# TODO: move to pair types config
yuid_sources_groups = {
    (ID_TYPE_LOGIN, ID_SOURCE_TYPE_FP): "passport",
    (ID_TYPE_EMAIL, ID_SOURCE_TYPE_FP): "passport",
    (ID_TYPE_EMAIL, ID_SOURCE_TYPE_PASSPORT): "passport",
    (ID_TYPE_PHONE, ID_SOURCE_TYPE_PASSPORT): "passport",
    (ID_TYPE_PHONE, ID_SOURCE_TYPE_PASSPORT_DUMP): "passport",
    # mail ru is also fetched from page titles and yabro and watch-log adv
    (ID_TYPE_EMAIL, ID_SOURCE_TYPE_WATCH_LOG_MAILRU): "mailru",
    (ID_TYPE_EMAIL, ID_SOURCE_TYPE_BARLOG): "mailru",
    (ID_TYPE_EMAIL, ID_SOURCE_TYPE_PAGE_TITLE): "mailru",
    (ID_TYPE_EMAIL, ID_SOURCE_TYPE_PAGE_TITLE): "mailru",
    # ditmsk
    # (ID_TYPE_EMAIL, ID_SOURCE_TYPE_DITMSK): ID_SOURCE_TYPE_DITMSK,
    # uncomment it later, when email_hash id_type will be avaliable
    (ID_TYPE_PHONE, ID_SOURCE_TYPE_DITMSK): ID_SOURCE_TYPE_DITMSK,
    # vk
    (ID_TYPE_VKCOM, ID_SOURCE_TYPE_FP): "vk",
    (ID_TYPE_VKCOM, ID_SOURCE_TYPE_WATCH_LOG): "vk",
    (ID_TYPE_VKCOM, ID_SOURCE_TYPE_BARLOG): "vk",
    (ID_TYPE_PHONE, ID_SOURCE_TYPE_VK + "_" + ID_SOURCE_TYPE_FP): "vk",
    (ID_TYPE_PHONE, ID_SOURCE_TYPE_VK + "_" + ID_SOURCE_TYPE_WATCH_LOG): "vk",
    (ID_TYPE_PHONE, ID_SOURCE_TYPE_VK + "_" + ID_SOURCE_TYPE_BARLOG): "vk",
    # distribution
    (ID_TYPE_BAR_UI, ID_SOURCE_TYPE_BARLOG): "distr",
    (ID_TYPE_BAR_UI, ID_SOURCE_TYPE_EAL): "distr",
    (ID_TYPE_BAR_UI, ID_SOURCE_TYPE_PUNTO): "distr",
    (ID_TYPE_BAR_UI, ID_SOURCE_TYPE_EXTERNAL_BROWSERS): "distr",
    (ID_TYPE_BAR_R1, ID_SOURCE_TYPE_BARLOG): "distr",
    (ID_TYPE_BAR_R1, ID_SOURCE_TYPE_NEIGHBOUR_COOKIES): "distr",
    # yamoney
    (ID_TYPE_EMAIL, ID_SOURCE_TYPE_YAMONEY): "yamoney",
    (ID_TYPE_PHONE, ID_SOURCE_TYPE_YAMONEY): "yamoney",
    (ID_TYPE_YAMONEY_ACCOUNT, ID_SOURCE_TYPE_YAMONEY): "yamoney",
    (ID_TYPE_YAMONEY_CARD_TOKEN, ID_SOURCE_TYPE_YAMONEY): "yamoney",
    (ID_TYPE_YAMONEY_INTERNAL, ID_SOURCE_TYPE_YAMONEY): "yamoney",
    # tickets
    (ID_TYPE_EMAIL, ID_SOURCE_TYPE_TICKETS): "tickets",
    (ID_TYPE_PHONE, ID_SOURCE_TYPE_TICKETS): "tickets",
    # kinopoisk
    (ID_TYPE_EMAIL, ID_SOURCE_TYPE_KINOPOISK): "kinopoisk",
    (ID_TYPE_KINOPOISK_UID, ID_SOURCE_TYPE_KINOPOISK): "kinopoisk",
}

d_y_source_groups = {
    # when email is taken into account in device matching
    ID_SOURCE_TYPE_ACCOUNT_MANAGER: "oauth",
    ID_SOURCE_TYPE_PASSPORT_OAUTH: "oauth",
    # redirect from app to browser are tracked in all these logs
    ID_SOURCE_TYPE_REDIR: "redir",
    ID_SOURCE_TYPE_WATCH_LOG: "redir",
    ID_SOURCE_TYPE_ACCESS_LOG: "redir",
}

# browser
FIELD_UA_PROFILE = "ua_profile"
FIELD_BROWSER_NAME = "browser"
FIELD_BROWSER_VERSION = "browser_version"
FIELD_BROWSER_WEBVIEW = "webview"

FIELD_UA = "ua"
FIELD_WAP_PROFILE = "wapprofile"

# socdem
FIELD_SOCDEM_SEX = "sex"
FIELD_SOCDEM_AGE = "age"
FIELD_SOCDEM_INCOME = "income"
FIELD_NEW_SOCDEM_SEX = "gender"
FIELD_NEW_SOCDEM_AGE = "age_segments"
FIELD_NEW_SOCDEM_INCOME = "income_segments"
FIELD_HEURISTIC_SEGMENTS = "heuristic_segments"
FIELD_PROBABILISTIC_SEGMENTS = "probabilistic_segments"
FIELD_INTERESTS_COMPOSITE = "interests_composite"
FIELD_EXACT_SOCDEM = "exact_socdem"

# ID types to write into dicts/yuid_ids table
NEW_TO_OLD_PAIRS_MAPPING = {
    (ID_TYPE_LOGIN, ID_SOURCE_TYPE_FP): "l",
    (ID_TYPE_FUID, ID_SOURCE_TYPE_FP): "f",
    (ID_TYPE_VKCOM, ID_SOURCE_TYPE_FP): "v",
    (ID_TYPE_IP, ID_SOURCE_TYPE_FP): "i",
    (ID_TYPE_EMAIL, ID_SOURCE_TYPE_WEBVISOR): "w",
}

# vertices out params
FINAL_MERGE_LIMIT = 30

VERTICES_TYPE_EXACT = "exact"
VERTICES_TYPE_CLUSTER = "cluster"

VERTICES_TYPE_SMART_LIMITS = "_smart_limits"
VERTICES_TYPE_ONLY_ACTIVE = "_only_active"

VERTICES_EXPERIMENTS = {VERTICES_TYPE_EXACT + "_" + VERTICES_TYPE_CLUSTER: None}

# Indevice magic constants

# This value is used to enlarge min_ts:max_ts interval to (min_ts - DELTA_TS:max_ts + DELTA_TS)
DELTA_TS = 15 * 60
# This is max time interval between start device activity and end device activity
WINDOW_TS = 5 * 60 * 60
# If we have only one candidate. How many hits from one yuid on same ip should be observed to make decision
HIT_THRESHOLD = 5
# If we have several devices as candidates in order to make decision this count ration is used
MAX_PREV_RATIO = 3

# indevice unperfect matrixnet model
INDEVICE_UNPERFECT_MODEL = os.getenv("INDEVICE_UNPERFECT_MODEL")

EXISTING_CIDS_SRC = "prev"

# VW org email classificator model
VW_ORG_EMAIL_CLASSIFIER = str(CRYPTA_GRAPH_CRYPTA_HOME) + "/state/extras/classifiers/org_emails.vw"

# browsers white list for distribution plots (#cube_logs #appsplyer #distribution)
DISTRIBUTION_BROWSERS_WHITE_LIST = [
    "mobilesafari",
    "androidbrowser",
    "chromemobile",
    "samsung internet",
    "ucbrowser",
    "operamobile",
    "mobilefirefox",
    "operamini",
    "cm browser",
    "coast",
    "firefox",
    "maxthon",
    "edge",
    "puffin",
    "sputnik",
    "amigo",
    "baidu",
    "iemobile",
    "blackberry",
]

YT_KEY_SIZE_LIMIT = 16384
METRICA_RSA_KEY_PATH = os.getenv("METRICA_RSA_KEY_PATH")
METRICA_RSA_KEY = os.getenv("METRICA_RSA_KEY")

YT_JOB_MAX_MEMORY_BYTES = os.getenv("YT_JOB_MAX_MEMORY_BYTES")

CRYPTA_PATH_PREFIX = os.getenv("CRYPTA_PATH_PREFIX", "/opt/crypta")
YQL_MRJOB = "{CRYPTA_PATH_PREFIX}/yql/tools/mrjob/mrjob".format(CRYPTA_PATH_PREFIX=CRYPTA_PATH_PREFIX)
YQL_UDF_RESOLVER = "{CRYPTA_PATH_PREFIX}/yql/tools/udf_resolver/udf_resolver".format(
    CRYPTA_PATH_PREFIX=CRYPTA_PATH_PREFIX
)
YQL_UDF_DIR = "{CRYPTA_PATH_PREFIX}/yql/udfs".format(CRYPTA_PATH_PREFIX=CRYPTA_PATH_PREFIX)

TVM_SECRET = os.getenv("TVM_SECRET")
TVM_API_URL = "tvm-api.yandex.net"
TVM_CRYPTA_SRC = 2000759
TVM_AUDIENCE_DST = 2000306

K_GB = 1 << 10 << 10 << 10

SOUP_BACKUP_ENABLE = os.getenv("SOUP_BACKUP_ENABLE") == "yes"
SOUP_BACKUP_MASTER = "hahn"
SOUP_BACKUP_SLAVE = "hahn"  # or "arnold"
SOUP_BACKUP_FOLDER = os.getenv("SOUP_BACKUP_FOLDER")
SOUP_TESTING_TTL_DAYS = 60 if CRYPTA_ENV != "production" else None

LOCAL_CLUSTERING_FOLDER = os.getenv("LOCAL_CLUSTERING_FOLDER", "/tmp")

# CRYPTR-1429 take only 1/10 of daily tables
SKIP_INPUT_DATA = True

STREAMING_LOGS = set(os_get_list("STREAMING_LOGS"))
# how long we are ready to wait for streaming data from the start of the day
STREAMING_MAX_WAIT_SECONDS = 6 * 3600
STREAMING_SLEEP_PERIOD = 10 * 60

ENABLE_IDSTORAGE_BACKUP = os.getenv("ENABLE_IDSTORAGE_BACKUP", "yes") == "yes"
SOUP_BACKUP_KEEP_DAYS = int(os.getenv("SOUP_BACKUP_KEEP_DAYS", "3"))
SOUP_BACKUP_KEEP_WEEKS = int(os.getenv("SOUP_BACKUP_KEEP_WEEKS", "2"))
SOUP_BACKUP_KEEP_MONTHS = int(os.getenv("SOUP_BACKUP_KEEP_MONTHS", "1"))

FUZZY2_USE_UN = os_get_bool("FUZZY2_USE_UN", "True")
SOUP_DATES_ENABLED = os_get_bool("SOUP_DATES_ENABLED", "True")
WEBVISOR_TOLERANCE = os_get_bool("WEBVISOR_TOLERANCE", "True")
YQL_FORCE_EMBEDDED = os_get_bool("YQL_FORCE_EMBEDDED", "False")

SOUP_NORMALIZE_LAZY = os_get_bool("SOUP_NORMALIZE_LAZY", "False")
WAKE_UP_TIME = 7
