import math
import re
import json
from datetime import datetime

class VideoProxima:
    def __init__(self,
                 depth=5,
                 popularity_field = "popularity",
                 relevance_field = "video_relevance",
                 video_quality_norm_weight=0.5,
                 video_quality_not_porno_weight=0,
                 kernel_not_porno_weight=0,
                 popularity_not_porno_weight=0,
                 player_quality_not_porno_weight=0,
                 video_quality_porno_weight=0,
                 kernel_porno_weight=0,
                 popularity_porno_weight=0,
                 player_quality_porno_weight=0,
                 kernel_multiplier=0.5,
                 boost_value=0.3,
                 fragment_hosts_weight=0.99,
                 social_net_hosts_weight=0.5,
                 pirate_hosts_weight=1.0,
                 freshness_always_fine=0.5,
                 freshness_tmp_fine=0,
                 vh_boost=0.1):
        self.depth = depth
        self.popularity_field = popularity_field
        self.relevance_field = relevance_field
        self.video_quality_norm_weight = video_quality_norm_weight

        self.video_quality_not_porno_weight = video_quality_not_porno_weight
        self.kernel_not_porno_weight = kernel_not_porno_weight
        self.popularity_not_porno_weight = popularity_not_porno_weight
        self.player_quality_not_porno_weight = player_quality_not_porno_weight

        self.video_quality_porno_weight = video_quality_porno_weight
        self.kernel_porno_weight = kernel_porno_weight
        self.popularity_porno_weight = popularity_porno_weight
        self.player_quality_porno_weight = player_quality_porno_weight

        self.kernel_multiplier = kernel_multiplier
        self.boost_value = boost_value

        self.fragment_hosts_weight = fragment_hosts_weight
        self.social_net_hosts_weight = social_net_hosts_weight
        self.pirate_hosts_weight = pirate_hosts_weight
        self.default_fragment_min_duration = 3 * 60
        self.default_fragment_max_duration = 5 * 60
        self.default_film_min_duration = 60 * 60
        self.default_film_max_duration = 3 * 60 * 60
        self.default_series_min_duration = 5 * 60
        self.default_series_max_duration = 60 * 60

        self.freshness_always_fine = freshness_always_fine
        self.freshness_tmp_fine = freshness_tmp_fine

        self.vh_boost = vh_boost

    def value(self, metric_params):

        REL_MINUS_WEIGHT = 0.7
        BOOST_VALUE = self.boost_value

        def num_value(param_name, label, middle_mark_weight):
            if param_name == 'rel':
                v = {'RELEVANT_PLUS': 1.0, 'RELEVANT_MINUS': middle_mark_weight, 'IRRELEVANT': 0.0, \
                     'SOFT_404': 0.0, '_404': 0.0, 'VIRUS' : 0.0}
            elif param_name == 'quality':
                v = {'OK': 1.0, 'NORM': middle_mark_weight, 'BAD': 0.0, '_404': 0.0}
            return v[label]

        def coef(rel):
            v = {'RELEVANT_PLUS': BOOST_VALUE,
                 'RELEVANT_MINUS': BOOST_VALUE,
                 'IRRELEVANT': 0.0
                 }
            return v.get(rel, 0.0)

        def GetHost(url):
            p = '(?:http.*://)?(?P<host>[^:/ ]+).?(?P<port>[0-9]*).*'
            m = re.search(p, url)
            return m.group('host')

        def get_factor_names(serp_data):
            host = GetHost(serp_data["headers"]["cleanUrl"])
            if "yandex" in host:
                return "video_quality", True
            elif "google" in host:
                return "video_quality_google", False
            else:
                return "unknown_system", None

        def get_factors_combination(res, metric_params, params, popularity_field):
            video_quality_name, is_yandex_serp = get_factor_names(metric_params.serp_data)

            if video_quality_name == "unknown_system":
                raise Exception
                return 0

            is_porno = int(params.get("is_porno", "0"))

            if is_porno:
                video_quality_weight = self.video_quality_porno_weight
                kernel_weight = self.kernel_porno_weight
                popularity_weight = self.popularity_porno_weight
                player_quality_weight = self.player_quality_porno_weight
            else:
                video_quality_weight = self.video_quality_not_porno_weight
                kernel_weight = self.kernel_not_porno_weight
                popularity_weight = self.popularity_not_porno_weight
                player_quality_weight = self.player_quality_not_porno_weight

            video_quality = num_value('quality', res.scales.get(video_quality_name, '_404'), self.video_quality_norm_weight)

            kernel = float(res.scales.get('video_albin_host_bundle_v2', 0.486))
            popularity = res.scales.get(popularity_field, 0)
            player_quality = res.scales.get('toloka_player_functionality', 0)

            my_clickadd = video_quality * video_quality_weight \
                          + kernel * kernel_weight \
                          + popularity * popularity_weight \
                          + player_quality * player_quality_weight

            my_clickadd = float(my_clickadd) / (video_quality_weight + kernel_weight * self.kernel_multiplier + popularity_weight + player_quality_weight)

            return my_clickadd

        def freshness(params, source_freshness):

            freshness_coef = 1

            is_always_freshness = int(params.get("is_always_freshness", "0"))
            is_tmp_freshness = int(params.get("is_tmp_freshness", "0"))

            if is_always_freshness == 1 and source_freshness not in ['VIDEOQUICK', 'VIDEOQUICK_MISSPELL']:
                freshness_coef = self.freshness_always_fine
            elif is_tmp_freshness == 1 and source_freshness not in ['VIDEOQUICK', 'VIDEOQUICK_MISSPELL']:
                freshness_coef = self.freshness_tmp_fine

            return freshness_coef

        results = metric_params.results[:self.depth]

        max_dcg = (1.0 + BOOST_VALUE) * sum([1.0 / math.log(2 + pos) for pos in range(self.depth)])

        params = json.loads(metric_params.serp_data.get("serp_query_param.custom_param", "{}"))

        need_piracy_fine = False
        is_film = int(params.get("is_film", "0"))
        is_serial = int(params.get("is_serial", "0"))

        if is_film or is_serial:
            need_piracy_fine = True
            if metric_params.serp_data.get('judgements.video_object_duration') and metric_params.serp_data['judgements.video_object_duration']['name'] != 'unknown':
                object_type = metric_params.serp_data['judgements.video_object_type']['name']
                object_duration = metric_params.serp_data['judgements.video_object_duration']['name']
                min_duration = float(object_duration) * 0.8
                max_duration = float(object_duration) * 1.2
            else:
                if is_film:
                    min_duration = self.default_film_min_duration
                    max_duration = self.default_film_max_duration
                else:
                    min_duration = self.default_series_min_duration
                    max_duration = self.default_series_max_duration

        dcg = 0
        relevance_name = self.relevance_field
        video_quality_name, is_yandex_serp = get_factor_names(metric_params.serp_data)
        for res in results:

            pos_index = res.pos

            assert (pos_index >= 1)

            res_url =  res.scales.get("componentUrl",{}).get("pageUrl", "")
            res_host = GetHost(res_url)

            vh_hosts = ['kinopoisk', 'frontend.vh', 'ott-widget']

            is_vh = 0
            if any(x in res_host for x in vh_hosts):
                is_vh = 1

            relevance = res.scales.get(relevance_name, '_404')
            rel_numeric = num_value('rel', relevance, REL_MINUS_WEIGHT)
            my_clickadd = get_factors_combination(res, metric_params, params, self.popularity_field)
            rel_value = rel_numeric + coef(relevance) * (self.vh_boost * is_vh + (1 - self.vh_boost) * my_clickadd)

            boost = 1.0

            player_id = res.scales.get("text.playerId")

            if need_piracy_fine:

                piracy = res.scales.get("player_stivenson_type")

                if not res.scales.get("text.videoDuration"):
                    duration = 0
                else:
                    convert_to_seconds=[1, 60, 3600]
                    duration = sum(int(x)*y for x, y in zip(res.scales.get("text.videoDuration").split(':')[::-1],
                                                            convert_to_seconds))

                is_trailer = False
                is_content = False
                is_fragment = False

                if res.scales.get("trailer_toloka", "OTHER") == "TRAILER":
                    is_trailer = True
                elif (duration >= min_duration and duration <= max_duration) or duration > max_duration:
                    is_content = True
                else:
                    is_fragment = True

                if is_content:
                    if piracy == "semi_pirate" and player_id not in ["ok", "vk", "mailru", "mailru_new"]:
                        boost = self.social_net_hosts_weight
                    elif piracy == "pirate":
                        boost = self.pirate_hosts_weight
                elif is_fragment:
                    boost = self.fragment_hosts_weight

            fresh_fine = freshness(params, res.scales.get('text.SERVER_DESCR', ''))

            if pos_index < 4:
                dcg += fresh_fine * boost * rel_value / math.log(1 + pos_index)
            else:
                dcg += boost * rel_value / math.log(1 + pos_index)

        return dcg / max_dcg

class VideoProximaVHboost:
    def __init__(self,
                 depth=5,
                 popularity_field = "popularity",
                 video_quality_norm_weight=0.5,
                 video_quality_not_porno_weight=0,
                 kernel_not_porno_weight=0,
                 popularity_not_porno_weight=0,
                 player_quality_not_porno_weight=0,
                 video_quality_porno_weight=0,
                 kernel_porno_weight=0,
                 popularity_porno_weight=0,
                 player_quality_porno_weight=0,
                 kernel_multiplier=0.5,
                 boost_value=0.3,
                 fragment_hosts_weight=0.99,
                 social_net_hosts_weight=0.5,
                 pirate_hosts_weight=1.0,
                 freshness_always_fine=0.5,
                 freshness_tmp_fine=0,
                 vh_boost=0.1,
                 vh_film_boost=0.2
                 ):
        self.depth = depth
        self.popularity_field = popularity_field
        self.video_quality_norm_weight = video_quality_norm_weight

        self.video_quality_not_porno_weight = video_quality_not_porno_weight
        self.kernel_not_porno_weight = kernel_not_porno_weight
        self.popularity_not_porno_weight = popularity_not_porno_weight
        self.player_quality_not_porno_weight = player_quality_not_porno_weight

        self.video_quality_porno_weight = video_quality_porno_weight
        self.kernel_porno_weight = kernel_porno_weight
        self.popularity_porno_weight = popularity_porno_weight
        self.player_quality_porno_weight = player_quality_porno_weight

        self.kernel_multiplier = kernel_multiplier
        self.boost_value = boost_value

        self.fragment_hosts_weight = fragment_hosts_weight
        self.social_net_hosts_weight = social_net_hosts_weight
        self.pirate_hosts_weight = pirate_hosts_weight
        self.default_fragment_min_duration = 3 * 60
        self.default_fragment_max_duration = 5 * 60
        self.default_film_min_duration = 60 * 60
        self.default_film_max_duration = 3 * 60 * 60
        self.default_series_min_duration = 5 * 60
        self.default_series_max_duration = 60 * 60

        self.freshness_always_fine = freshness_always_fine
        self.freshness_tmp_fine = freshness_tmp_fine

        self.vh_boost = vh_boost
        self.vh_film_boost = vh_film_boost

    def value(self, metric_params):

        REL_MINUS_WEIGHT = 0.7
        BOOST_VALUE = self.boost_value

        def num_value(param_name, label, middle_mark_weight):
            if param_name == 'rel':
                v = {'RELEVANT_PLUS': 1.0, 'RELEVANT_MINUS': middle_mark_weight, 'IRRELEVANT': 0.0, \
                     'SOFT_404': 0.0, '_404': 0.0, 'VIRUS' : 0.0}
            elif param_name == 'quality':
                v = {'OK': 1.0, 'NORM': middle_mark_weight, 'BAD': 0.0, '_404': 0.0}
            return v[label]

        def coef(rel):
            v = {'RELEVANT_PLUS': BOOST_VALUE,
                 'RELEVANT_MINUS': BOOST_VALUE,
                 'IRRELEVANT': 0.0
                 }
            return v.get(rel, 0.0)

        def GetHost(url):
            p = '(?:http.*://)?(?P<host>[^:/ ]+).?(?P<port>[0-9]*).*'
            m = re.search(p, url)
            return m.group('host')

        def get_factor_names(serp_data):
            host = GetHost(serp_data["headers"]["cleanUrl"])
            if "yandex" in host:
                return "video_quality", True
            elif "google" in host:
                return "video_quality_google", False
            else:
                return "unknown_system", None

        def get_factors_combination(res, metric_params, params, popularity_field):
            video_quality_name, is_yandex_serp = get_factor_names(metric_params.serp_data)

            if video_quality_name == "unknown_system":
                raise Exception
                return 0

            is_porno = int(params.get("is_porno", "0"))

            if is_porno:
                video_quality_weight = self.video_quality_porno_weight
                kernel_weight = self.kernel_porno_weight
                popularity_weight = self.popularity_porno_weight
                player_quality_weight = self.player_quality_porno_weight
            else:
                video_quality_weight = self.video_quality_not_porno_weight
                kernel_weight = self.kernel_not_porno_weight
                popularity_weight = self.popularity_not_porno_weight
                player_quality_weight = self.player_quality_not_porno_weight

            video_quality = num_value('quality', res.scales.get(video_quality_name, '_404'), self.video_quality_norm_weight)

            kernel = float(res.scales.get('video_albin_host_bundle_v2', 0.486))
            popularity = res.scales.get(popularity_field, 0)
            player_quality = res.scales.get('toloka_player_functionality', 0)

            my_clickadd = video_quality * video_quality_weight \
                          + kernel * kernel_weight \
                          + popularity * popularity_weight \
                          + player_quality * player_quality_weight

            my_clickadd = float(my_clickadd) / (video_quality_weight + kernel_weight * self.kernel_multiplier + popularity_weight + player_quality_weight)

            return my_clickadd

        def freshness(params, source_freshness):

            freshness_coef = 1

            is_always_freshness = int(params.get("is_always_freshness", "0"))
            is_tmp_freshness = int(params.get("is_tmp_freshness", "0"))

            if is_always_freshness == 1 and source_freshness not in ['VIDEOQUICK', 'VIDEOQUICK_MISSPELL']:
                freshness_coef = self.freshness_always_fine
            elif is_tmp_freshness == 1 and source_freshness not in ['VIDEOQUICK', 'VIDEOQUICK_MISSPELL']:
                freshness_coef = self.freshness_tmp_fine

            return freshness_coef

        results = metric_params.results[:self.depth]

        max_dcg = (1.0 + BOOST_VALUE) * sum([1.0 / math.log(2 + pos) for pos in range(self.depth)])

        params = json.loads(metric_params.serp_data.get("serp_query_param.custom_param", "{}"))

        need_piracy_fine = False
        is_film = int(params.get("is_film", "0"))
        is_serial = int(params.get("is_serial", "0"))

        vh_add = self.vh_boost
        if is_film or is_serial:
            vh_add = self.vh_film_boost
            need_piracy_fine = True
            if metric_params.serp_data.get('judgements.video_object_duration') and metric_params.serp_data['judgements.video_object_duration']['name'] != 'unknown':
                object_type = metric_params.serp_data['judgements.video_object_type']['name']
                object_duration = metric_params.serp_data['judgements.video_object_duration']['name']
                min_duration = float(object_duration) * 0.8
                max_duration = float(object_duration) * 1.2
            else:
                if is_film:
                    min_duration = self.default_film_min_duration
                    max_duration = self.default_film_max_duration
                else:
                    min_duration = self.default_series_min_duration
                    max_duration = self.default_series_max_duration

        dcg = 0
        relevance_name = "video_relevance"
        video_quality_name, is_yandex_serp = get_factor_names(metric_params.serp_data)
        for res in results:

            pos_index = res.pos

            assert (pos_index >= 1)

            res_url =  res.scales.get("componentUrl",{}).get("pageUrl", "")
            res_host = GetHost(res_url)

            vh_hosts = ['kinopoisk', 'frontend.vh', 'ott-widget']

            is_vh = 0
            if any(x in res_host for x in vh_hosts):
                is_vh = 1

            relevance = res.scales.get(relevance_name, '_404')
            rel_numeric = num_value('rel', relevance, REL_MINUS_WEIGHT)
            my_clickadd = get_factors_combination(res, metric_params, params, self.popularity_field)
            rel_value = rel_numeric + coef(relevance) * (vh_add * is_vh + (1 - vh_add) * my_clickadd)

            boost = 1.0

            player_id = res.scales.get("text.playerId")

            if need_piracy_fine:

                piracy = res.scales.get("player_stivenson_type")

                if not res.scales.get("text.videoDuration"):
                    duration = 0
                else:
                    convert_to_seconds=[1, 60, 3600]
                    duration = sum(int(x)*y for x, y in zip(res.scales.get("text.videoDuration").split(':')[::-1],
                                                            convert_to_seconds))

                is_trailer = False
                is_content = False
                is_fragment = False

                if res.scales.get("trailer_toloka", "OTHER") == "TRAILER":
                    is_trailer = True
                elif (duration >= min_duration and duration <= max_duration) or duration > max_duration:
                    is_content = True
                else:
                    is_fragment = True

                if is_content:
                    if piracy == "semi_pirate" and player_id not in ["ok", "vk", "mailru", "mailru_new"]:
                        boost = self.social_net_hosts_weight
                    elif piracy == "pirate":
                        boost = self.pirate_hosts_weight
                elif is_fragment:
                    boost = self.fragment_hosts_weight

            fresh_fine = freshness(params, res.scales.get('text.SERVER_DESCR', ''))

            if pos_index < 4:
                dcg += fresh_fine * boost * rel_value / math.log(1 + pos_index)
            else:
                dcg += boost * rel_value / math.log(1 + pos_index)

        return dcg / max_dcg


class VideoProximaAA:
    def __init__(self,
                 depth=5,
                 popularity_field = "popularity",
                 video_quality_norm_weight=0.5,
                 video_quality_not_porno_weight=0,
                 kernel_not_porno_weight=0,
                 popularity_not_porno_weight=0,
                 player_quality_not_porno_weight=0,
                 video_quality_porno_weight=0,
                 kernel_porno_weight=0,
                 popularity_porno_weight=0,
                 player_quality_porno_weight=0,
                 kernel_multiplier=0.5,
                 boost_value=0.3,
                 fragment_hosts_weight=0.99,
                 social_net_hosts_weight=0.5,
                 pirate_hosts_weight=1.0,
                 freshness_always_fine=0.5,
                 freshness_tmp_fine=0,
                 dups_fine=0.5,
                 with_dups_fine=0,
                 player_boost=0.1,
                 preroll_middle = 0.25
                 ):
        self.depth = depth
        self.popularity_field = popularity_field
        self.video_quality_norm_weight = video_quality_norm_weight
        self.video_quality_not_porno_weight = video_quality_not_porno_weight
        self.kernel_not_porno_weight = kernel_not_porno_weight
        self.popularity_not_porno_weight = popularity_not_porno_weight
        self.player_quality_not_porno_weight = player_quality_not_porno_weight
        self.video_quality_porno_weight = video_quality_porno_weight
        self.kernel_porno_weight = kernel_porno_weight
        self.popularity_porno_weight = popularity_porno_weight
        self.player_quality_porno_weight = player_quality_porno_weight
        self.kernel_multiplier = kernel_multiplier
        self.boost_value = boost_value
        self.fragment_hosts_weight = fragment_hosts_weight
        self.social_net_hosts_weight = social_net_hosts_weight
        self.pirate_hosts_weight = pirate_hosts_weight
        self.default_fragment_min_duration = 3 * 60
        self.default_fragment_max_duration = 5 * 60
        self.default_film_min_duration = 60 * 60
        self.default_film_max_duration = 3 * 60 * 60
        self.default_series_min_duration = 5 * 60
        self.default_series_max_duration = 60 * 60

        self.freshness_always_fine = freshness_always_fine
        self.freshness_tmp_fine = freshness_tmp_fine

        self.dups_fine = dups_fine
        self.with_dups_fine = with_dups_fine

        self.player_boost = player_boost
        self.preroll_middle = preroll_middle

    def value(self, metric_params):
        REL_MINUS_WEIGHT = 0.7
        BOOST_VALUE = self.boost_value
        def num_value(param_name, label, middle_mark_weight):
            if param_name == 'rel':
                v = {'RELEVANT_PLUS': 1.0, 'RELEVANT_MINUS': middle_mark_weight, 'IRRELEVANT': 0.0, \
                     'SOFT_404': 0.0, '_404': 0.0, 'VIRUS' : 0.0}
            elif param_name == 'quality':
                v = {'OK': 1.0, 'NORM': middle_mark_weight, 'BAD': 0.0, '_404': 0.0}
            return v[label]

        def coef(rel):
            v = {'RELEVANT_PLUS': BOOST_VALUE,
                 'RELEVANT_MINUS': BOOST_VALUE,
                 'IRRELEVANT': 0.0
                 }
            return v.get(rel, 0.0)

        def GetHost(url):
            p = '(?:http.*://)?(?P<host>[^:/ ]+).?(?P<port>[0-9]*).*'
            m = re.search(p, url)
            return m.group('host')

        def get_factor_names(serp_data):
            host = GetHost(serp_data["headers"]["cleanUrl"])
            if "yandex" in host:
                return "video_quality", True
            elif "google" in host:
                return "video_quality_google", False
            else:
                return "unknown_system", None

        def get_factors_combination(res, metric_params, params, popularity_field):
            video_quality_name, is_yandex_serp = get_factor_names(metric_params.serp_data)
            if video_quality_name == "unknown_system":
                raise Exception
                return 0
            is_porno = int(params.get("is_porno", "0"))
            if is_porno:
                video_quality_weight = self.video_quality_porno_weight
                kernel_weight = self.kernel_porno_weight
                popularity_weight = self.popularity_porno_weight
                player_quality_weight = self.player_quality_porno_weight
            else:
                video_quality_weight = self.video_quality_not_porno_weight
                kernel_weight = self.kernel_not_porno_weight
                popularity_weight = self.popularity_not_porno_weight
                player_quality_weight = self.player_quality_not_porno_weight
            video_quality = num_value('quality', res.scales.get(video_quality_name, '_404'), self.video_quality_norm_weight)
            kernel = float(res.scales.get('video_albin_host_bundle_v2', 0.486))
            popularity = res.scales.get(popularity_field, 0)
            player_quality = res.scales.get('toloka_player_functionality', 0)

            res_url =  res.scales.get("componentUrl",{}).get("pageUrl", "")
            res_host = GetHost(res_url)

            """
            vh_hosts = ['kinopoisk', 'frontend.vh', 'ott-widget']
            if any(x in res_host for x in vh_hosts):
                popularity = 1.0
                kernel = 1.0
            """

            my_clickadd = video_quality * video_quality_weight \
                          + kernel * kernel_weight \
                          + popularity * popularity_weight \
                          + player_quality * player_quality_weight
            my_clickadd = float(my_clickadd) / (video_quality_weight + kernel_weight * self.kernel_multiplier + popularity_weight + player_quality_weight)

            return my_clickadd

        def freshness(params, source_freshness):

            freshness_coef = 1
            is_always_freshness = int(params.get("is_always_freshness", "0"))
            is_tmp_freshness = int(params.get("is_tmp_freshness", "0"))

            if is_always_freshness == 1 and source_freshness not in ['VIDEOQUICK', 'VIDEOQUICK_MISSPELL']:
                freshness_coef = self.freshness_always_fine
            elif is_tmp_freshness == 1 and source_freshness not in ['VIDEOQUICK', 'VIDEOQUICK_MISSPELL']:
                freshness_coef = self.freshness_tmp_fine

            return freshness_coef

        def need_player_boost(prerolls_share, preroll_middle):
            return 1.0 / (1.0 + math.exp(30 * (prerolls_share - preroll_middle)))

        results = metric_params.results[:self.depth]
        max_dcg = (1.0 + BOOST_VALUE) * sum([1.0 / math.log(2 + pos) for pos in range(self.depth)])
        params = json.loads(metric_params.serp_data.get("serp_query_param.custom_param", "{}"))

        need_piracy_fine = False
        is_film = int(params.get("is_film", "0"))
        is_serial = int(params.get("is_serial", "0"))
        if is_film or is_serial:
            need_piracy_fine = True
            if metric_params.serp_data.get('judgements.video_object_duration') and metric_params.serp_data['judgements.video_object_duration']['name'] != 'unknown':
                object_type = metric_params.serp_data['judgements.video_object_type']['name']
                object_duration = metric_params.serp_data['judgements.video_object_duration']['name']
                min_duration = float(object_duration) * 0.8
                max_duration = float(object_duration) * 1.2
            else:
                if is_film:
                    min_duration = self.default_film_min_duration
                    max_duration = self.default_film_max_duration
                else:
                    min_duration = self.default_series_min_duration
                    max_duration = self.default_series_max_duration

        dcg = 0
        relevance_name = "video_relevance"
        video_quality_name, is_yandex_serp = get_factor_names(metric_params.serp_data)
        dups_fines = {}

        for res in results:
            pos_index = res.pos
            assert (pos_index >= 1)

            dups_fine = 1
            if self.with_dups_fine:
                dup_pos = int(res.scales.get("video_dups", "-1"))
                if dup_pos > 0:
                    dups_fine = dups_fines.get(dup_pos, 1) * 0.5
                    dups_fines[dup_pos] = dups_fine

            res_url =  res.scales.get("componentUrl",{}).get("pageUrl", "")
            res_host = GetHost(res_url)

            relevance = res.scales.get(relevance_name, '_404')
            rel_numeric = num_value('rel', relevance, REL_MINUS_WEIGHT)
            my_clickadd = get_factors_combination(res, metric_params, params, self.popularity_field)

            prerolls_share = res.scales.get("player_prerolls_share", 1.0)
            rel_value = rel_numeric + coef(relevance) * (self.player_boost * need_player_boost(prerolls_share, self.preroll_middle) + (1 - self.player_boost) * my_clickadd)

            boost = 1.0
            player_id = res.scales.get("text.playerId")
            if need_piracy_fine:
                piracy = res.scales.get("player_stivenson_type")
                if not res.scales.get("text.videoDuration"):
                    duration = 0
                else:
                    convert_to_seconds=[1, 60, 3600]
                    duration = sum(int(x)*y for x, y in zip(res.scales.get("text.videoDuration").split(':')[::-1],
                                                            convert_to_seconds))
                is_trailer = False
                is_content = False
                is_fragment = False
                if res.scales.get("trailer_toloka", "OTHER") == "TRAILER":
                    is_trailer = True
                elif (duration >= min_duration and duration <= max_duration) or duration > max_duration:
                    is_content = True
                else:
                    is_fragment = True
                if is_content:
                    if piracy == "semi_pirate" and player_id not in ["ok", "vk", "mailru", "mailru_new"]:
                        boost = self.social_net_hosts_weight
                    elif piracy == "pirate":
                        boost = self.pirate_hosts_weight
                elif is_fragment:
                    boost = self.fragment_hosts_weight

            fresh_fine = freshness(params, res.scales.get('text.SERVER_DESCR', ''))
            if pos_index < 4:
                dcg += dups_fine * fresh_fine * boost * rel_value / math.log(1 + pos_index)
            else:
                dcg += dups_fine * boost * rel_value / math.log(1 + pos_index)

        return dcg / max_dcg


class VideoProximaVH:
    def __init__(self,
                 depth=5,
                 popularity_field = "popularity",
                 video_quality_norm_weight=0.5,
                 video_quality_not_porno_weight=0,
                 kernel_not_porno_weight=0,
                 popularity_not_porno_weight=0,
                 player_quality_not_porno_weight=0,
                 video_quality_porno_weight=0,
                 kernel_porno_weight=0,
                 popularity_porno_weight=0,
                 player_quality_porno_weight=0,
                 kernel_multiplier=0.5,
                 boost_value=0.3,
                 fragment_hosts_weight=0.99,
                 social_net_hosts_weight=0.5,
                 pirate_hosts_weight=1.0,
                 freshness_always_fine=0.5,
                 freshness_tmp_fine=0,
                 dups_fine=0.5,
                 with_dups_fine=0,
                 player_boost=0.1,
                 preroll_middle = 0.25,
                 custom_component_only=None
                 ):
        self.depth = depth
        self.popularity_field = popularity_field
        self.video_quality_norm_weight = video_quality_norm_weight
        self.video_quality_not_porno_weight = video_quality_not_porno_weight
        self.kernel_not_porno_weight = kernel_not_porno_weight
        self.popularity_not_porno_weight = popularity_not_porno_weight
        self.player_quality_not_porno_weight = player_quality_not_porno_weight
        self.video_quality_porno_weight = video_quality_porno_weight
        self.kernel_porno_weight = kernel_porno_weight
        self.popularity_porno_weight = popularity_porno_weight
        self.player_quality_porno_weight = player_quality_porno_weight
        self.kernel_multiplier = kernel_multiplier
        self.boost_value = boost_value
        self.fragment_hosts_weight = fragment_hosts_weight
        self.social_net_hosts_weight = social_net_hosts_weight
        self.pirate_hosts_weight = pirate_hosts_weight
        self.default_fragment_min_duration = 3 * 60
        self.default_fragment_max_duration = 5 * 60
        self.default_film_min_duration = 60 * 60
        self.default_film_max_duration = 3 * 60 * 60
        self.default_series_min_duration = 5 * 60
        self.default_series_max_duration = 60 * 60

        self.freshness_always_fine = freshness_always_fine
        self.freshness_tmp_fine = freshness_tmp_fine

        self.dups_fine = dups_fine
        self.with_dups_fine = with_dups_fine

        self.player_boost = player_boost
        self.preroll_middle = preroll_middle

        self.custom_component_only = custom_component_only

    def value(self, metric_params):
        REL_MINUS_WEIGHT = 0.7
        BOOST_VALUE = self.boost_value
        def num_value(param_name, label, middle_mark_weight):
            if param_name == 'rel':
                v = {'RELEVANT_PLUS': 1.0, 'RELEVANT_MINUS': middle_mark_weight, 'IRRELEVANT': 0.0, \
                     'SOFT_404': 0.0, '_404': 0.0, 'VIRUS' : 0.0}
            elif param_name == 'quality':
                v = {'OK': 1.0, 'NORM': middle_mark_weight, 'BAD': 0.0, '_404': 0.0}
            return v[label]

        def coef(rel):
            v = {'RELEVANT_PLUS': BOOST_VALUE,
                 'RELEVANT_MINUS': BOOST_VALUE,
                 'IRRELEVANT': 0.0
                 }
            return v.get(rel, 0.0)

        def GetHost(url):
            p = '(?:http.*://)?(?P<host>[^:/ ]+).?(?P<port>[0-9]*).*'
            m = re.search(p, url)
            return m.group('host')

        def get_factor_names(serp_data):
            host = GetHost(serp_data["headers"]["cleanUrl"])
            if "yandex" in host:
                return "video_quality", True
            elif "google" in host:
                return "video_quality_google", False
            else:
                return "unknown_system", None

        def get_factors_combination(res, metric_params, params, popularity_field, custom_component_only):
            video_quality_name, is_yandex_serp = get_factor_names(metric_params.serp_data)
            if video_quality_name == "unknown_system":
                raise Exception
                return 0
            is_porno = int(params.get("is_porno", "0"))
            if is_porno:
                video_quality_weight = self.video_quality_porno_weight
                kernel_weight = self.kernel_porno_weight
                popularity_weight = self.popularity_porno_weight
                player_quality_weight = self.player_quality_porno_weight
            else:
                video_quality_weight = self.video_quality_not_porno_weight
                kernel_weight = self.kernel_not_porno_weight
                popularity_weight = self.popularity_not_porno_weight
                player_quality_weight = self.player_quality_not_porno_weight
            video_quality = num_value('quality', res.scales.get(video_quality_name, '_404'), self.video_quality_norm_weight)
            kernel = float(res.scales.get('video_albin_host_bundle_v2', 0.486))
            popularity = res.scales.get(popularity_field, 0)
            player_quality = res.scales.get('toloka_player_functionality', 0)

            res_url =  res.scales.get("componentUrl",{}).get("pageUrl", "")
            res_host = GetHost(res_url)

            """
            vh_hosts = ['kinopoisk', 'frontend.vh', 'ott-widget']
            if any(x in res_host for x in vh_hosts):
                popularity = 1.0
                kernel = 1.0
            """


            if custom_component_only == 'popularity':
                my_clickadd = popularity * popularity_weight
            elif custom_component_only == 'video_quality':
                my_clickadd = video_quality * video_quality_weight
            elif custom_component_only == 'kernel':
                my_clickadd = kernel * kernel_weight
            elif custom_component_only == 'player_quality':
                my_clickadd = player_quality * player_quality_weight
            else:
                my_clickadd = video_quality * video_quality_weight \
                              + kernel * kernel_weight \
                              + popularity * popularity_weight \
                              + player_quality * player_quality_weight
            my_clickadd = float(my_clickadd) / (video_quality_weight + kernel_weight * self.kernel_multiplier + popularity_weight + player_quality_weight)

            return my_clickadd

        def freshness(params, source_freshness):

            freshness_coef = 1
            is_always_freshness = int(params.get("is_always_freshness", "0"))
            is_tmp_freshness = int(params.get("is_tmp_freshness", "0"))

            if is_always_freshness == 1 and source_freshness not in ['VIDEOQUICK', 'VIDEOQUICK_MISSPELL']:
                freshness_coef = self.freshness_always_fine
            elif is_tmp_freshness == 1 and source_freshness not in ['VIDEOQUICK', 'VIDEOQUICK_MISSPELL']:
                freshness_coef = self.freshness_tmp_fine

            return freshness_coef

        def need_player_boost(prerolls_share, preroll_middle):
            return 1.0 / (1.0 + math.exp(30 * (prerolls_share - preroll_middle)))

        results = metric_params.results[:self.depth]
        max_dcg = (1.0 + BOOST_VALUE) * sum([1.0 / math.log(2 + pos) for pos in range(self.depth)])
        params = json.loads(metric_params.serp_data.get("serp_query_param.custom_param", "{}"))
        updated_content_ids = json.loads(metric_params.serp_data.get("judgements.video_content_ids", {}).get("name", "[]"))

        need_piracy_fine = False
        is_film = int(params.get("is_film", "0"))
        is_serial = int(params.get("is_serial", "0"))
        if is_film or is_serial:
            need_piracy_fine = True
            if metric_params.serp_data.get('judgements.video_object_duration') and metric_params.serp_data['judgements.video_object_duration']['name'] != 'unknown':
                object_type = metric_params.serp_data['judgements.video_object_type']['name']
                object_duration = metric_params.serp_data['judgements.video_object_duration']['name']
                min_duration = float(object_duration) * 0.8
                max_duration = float(object_duration) * 1.2
            else:
                if is_film:
                    min_duration = self.default_film_min_duration
                    max_duration = self.default_film_max_duration
                else:
                    min_duration = self.default_series_min_duration
                    max_duration = self.default_series_max_duration

        dcg = 0
        relevance_name = "video_relevance"
        video_quality_name, is_yandex_serp = get_factor_names(metric_params.serp_data)
        dups_fines = {}

        for res in results:
            pos_index = res.pos
            assert (pos_index >= 1)

            dups_fine = 1
            if self.with_dups_fine:
                dup_pos = int(res.scales.get("video_dups", "-1"))
                if dup_pos > 0:
                    dups_fine = dups_fines.get(dup_pos, 1) * 0.5
                    dups_fines[dup_pos] = dups_fine

            res_url =  res.scales.get("componentUrl",{}).get("pageUrl", "")
            res_host = GetHost(res_url)

            relevance = res.scales.get(relevance_name, '_404')
            rel_numeric = num_value('rel', relevance, REL_MINUS_WEIGHT)
            my_clickadd = get_factors_combination(res, metric_params, params, self.popularity_field, self.custom_component_only)

            prerolls_share = res.scales.get("player_prerolls_share", 1.0)
            if self.custom_component_only is not None:
                rel_value = coef(relevance) * (1 - self.player_boost) * my_clickadd
            else:
                rel_value = rel_numeric + coef(relevance) * (self.player_boost * need_player_boost(prerolls_share, self.preroll_middle) + (1 - self.player_boost) * my_clickadd)

            boost = 1.0
            player_id = res.scales.get("text.playerId")
            if need_piracy_fine:
                piracy = res.scales.get("player_stivenson_type")
                if not res.scales.get("text.videoDuration"):
                    duration = 0
                else:
                    convert_to_seconds=[1, 60, 3600]
                    duration = sum(int(x)*y for x, y in zip(res.scales.get("text.videoDuration").split(':')[::-1],
                                                            convert_to_seconds))
                is_trailer = False
                is_content = False
                is_fragment = False
                if res.scales.get("trailer_toloka", "OTHER") == "TRAILER":
                    is_trailer = True
                elif (duration >= min_duration and duration <= max_duration) or duration > max_duration:
                    is_content = True
                else:
                    is_fragment = True
                if is_content:
                    if piracy == "semi_pirate" and player_id not in ["ok", "vk", "mailru", "mailru_new"]:
                        boost = self.social_net_hosts_weight
                    elif piracy == "pirate":
                        boost = self.pirate_hosts_weight
                elif is_fragment:
                    boost = self.fragment_hosts_weight

            fresh_fine = freshness(params, res.scales.get('text.SERVER_DESCR', ''))

            vh_fine = 1.0
            if pos_index == 1 and len(updated_content_ids) > 0:
                current_content_id = res_url.split('/')[-1]
                if current_content_id != "" and current_content_id not in updated_content_ids:
                    vh_fine = 0.0

            if pos_index < 4:
                dcg += vh_fine * dups_fine * fresh_fine * boost * rel_value / math.log(1 + pos_index)
            else:
                dcg += vh_fine * dups_fine * boost * rel_value / math.log(1 + pos_index)

        return dcg / max_dcg


class VideoProximaClean:
    def __init__(self,
                 depth=5,
                 popularity_field = "popularity",
                 relevance_field = "video_relevance",
                 video_dups_field = "video_dups",
                 video_quality_norm_weight=0.5,
                 video_quality_not_porno_weight=0,
                 kernel_not_porno_weight=0,
                 popularity_not_porno_weight=0,
                 player_quality_not_porno_weight=0,
                 video_quality_porno_weight=0,
                 kernel_porno_weight=0,
                 popularity_porno_weight=0,
                 player_quality_porno_weight=0,
                 kernel_multiplier=0.5,
                 boost_value=0.3,
                 fragment_hosts_weight=0.99,
                 social_net_hosts_weight=0.5,
                 pirate_hosts_weight=1.0,
                 freshness_always_fine=0.5,
                 freshness_tmp_fine=0,
                 dups_fine=0.5,
                 with_dups_fine=0,
                 player_boost=0.1,
                 preroll_middle = 0.25,
                 custom_component_only=None,
                 consider_actuality=False,
                 actuality_no_fine_age_days=30,
                 actuality_max_fine_age_days=1095,
                 actuality_max_fine=0.5
                 ):
        self.depth = depth
        self.popularity_field = popularity_field
        self.relevance_field = relevance_field
        self.video_dups_field = video_dups_field
        self.video_quality_norm_weight = video_quality_norm_weight
        self.video_quality_not_porno_weight = video_quality_not_porno_weight
        self.kernel_not_porno_weight = kernel_not_porno_weight
        self.popularity_not_porno_weight = popularity_not_porno_weight
        self.player_quality_not_porno_weight = player_quality_not_porno_weight
        self.video_quality_porno_weight = video_quality_porno_weight
        self.kernel_porno_weight = kernel_porno_weight
        self.popularity_porno_weight = popularity_porno_weight
        self.player_quality_porno_weight = player_quality_porno_weight
        self.kernel_multiplier = kernel_multiplier
        self.boost_value = boost_value
        self.fragment_hosts_weight = fragment_hosts_weight
        self.social_net_hosts_weight = social_net_hosts_weight
        self.pirate_hosts_weight = pirate_hosts_weight
        self.default_fragment_min_duration = 3 * 60
        self.default_fragment_max_duration = 5 * 60
        self.default_film_min_duration = 60 * 60
        self.default_film_max_duration = 3 * 60 * 60
        self.default_series_min_duration = 5 * 60
        self.default_series_max_duration = 60 * 60

        self.freshness_always_fine = freshness_always_fine
        self.freshness_tmp_fine = freshness_tmp_fine

        self.dups_fine = dups_fine
        self.with_dups_fine = with_dups_fine

        self.player_boost = player_boost
        self.preroll_middle = preroll_middle

        self.custom_component_only = custom_component_only

        self.consider_actuality = consider_actuality
        self.actuality_no_fine_age_days = actuality_no_fine_age_days
        self.actuality_max_fine_age_days = actuality_max_fine_age_days
        self.actuality_max_fine = actuality_max_fine

    def value(self, metric_params):
        REL_MINUS_WEIGHT = 0.7
        BOOST_VALUE = self.boost_value
        def num_value(param_name, label, middle_mark_weight):
            if param_name == 'rel':
                v = {'RELEVANT_PLUS': 1.0,
                'RELEVANT_PLUS_NO_PLAYER': 0.5,
                'RELEVANT_MINUS': middle_mark_weight,
                'RELEVANT_MINUS_NO_PLAYER': middle_mark_weight/2,
                'IRRELEVANT': 0.0,
                'IRRELEVANT_NO_PLAYER': 0.0,
                'SOFT_404': 0.0,
                'SOFT_404_NO_PLAYER': 0.0,
                '_404': 0.0,
                '_404_NO_PLAYER': 0.0,
                'VIRUS' : 0.0,
                'VIRUS_NO_PLAYER' : 0.0
                }
            elif param_name == 'quality':
                v = {'OK': 1.0, 'NORM': middle_mark_weight, 'BAD': 0.0, '_404': 0.0}
            return v[label]

        def coef(rel):
            v = {'RELEVANT_PLUS': BOOST_VALUE,
                 'RELEVANT_MINUS': BOOST_VALUE,
                 'IRRELEVANT': 0.0
                 }
            return v.get(rel, 0.0)

        def GetHost(url):
            p = '(?:http.*://)?(?P<host>[^:/ ]+).?(?P<port>[0-9]*).*'
            m = re.search(p, url)
            return m.group('host')

        def get_factor_names(serp_data):
            host = GetHost(serp_data["headers"]["cleanUrl"])
            if "yandex" in host:
                return "video_quality", True
            elif "google" in host:
                return "video_quality_google", False
            else:
                return "unknown_system", None

        def get_factors_combination(res, metric_params, params, popularity_field, custom_component_only):
            video_quality_name, is_yandex_serp = get_factor_names(metric_params.serp_data)
            if video_quality_name == "unknown_system":
                raise Exception
                return 0
            is_porno = int(params.get("is_porno", "0"))
            if is_porno:
                video_quality_weight = self.video_quality_porno_weight
                kernel_weight = self.kernel_porno_weight
                popularity_weight = self.popularity_porno_weight
                player_quality_weight = self.player_quality_porno_weight
            else:
                video_quality_weight = self.video_quality_not_porno_weight
                kernel_weight = self.kernel_not_porno_weight
                popularity_weight = self.popularity_not_porno_weight
                player_quality_weight = self.player_quality_not_porno_weight
            video_quality = num_value('quality', res.scales.get(video_quality_name, '_404'), self.video_quality_norm_weight)
            #kernel = float(res.scales.get('video_albin_host_bundle_v2', 0.486))
            kernel_bundle = json.loads(res.scales.get('proxima_business_kernel_2019_10', '{}'))
            kernel = kernel_bundle.get('biz_kernel', 0.43) + 0.5

            res_url =  res.scales.get("componentUrl",{}).get("pageUrl", "")
            res_host = GetHost(res_url)
            popularity = res.scales.get(popularity_field, 0)
            if 'frontend.vh' in res_host:
                popularity = res.scales.get('popularity_prediction', 0)
            player_quality = res.scales.get('toloka_player_functionality', 0)

            """
            vh_hosts = ['kinopoisk', 'frontend.vh', 'ott-widget']
            if any(x in res_host for x in vh_hosts):
                popularity = 1.0
                kernel = 1.0
            """


            if custom_component_only == 'popularity':
                my_clickadd = popularity * popularity_weight
            elif custom_component_only == 'video_quality':
                my_clickadd = video_quality * video_quality_weight
            elif custom_component_only == 'kernel':
                my_clickadd = kernel * kernel_weight
            elif custom_component_only == 'player_quality':
                my_clickadd = player_quality * player_quality_weight
            else:
                my_clickadd = video_quality * video_quality_weight \
                              + kernel * kernel_weight \
                              + popularity * popularity_weight \
                              + player_quality * player_quality_weight
            my_clickadd = float(my_clickadd) / (video_quality_weight + kernel_weight * self.kernel_multiplier + popularity_weight + player_quality_weight)

            return my_clickadd

        def freshness(params, source_freshness):

            freshness_coef = 1
            is_always_freshness = int(params.get("is_always_freshness", "0"))
            is_tmp_freshness = int(params.get("is_tmp_freshness", "0"))

            if is_always_freshness == 1 and source_freshness not in ['VIDEOQUICK', 'VIDEOQUICK_MISSPELL']:
                freshness_coef = self.freshness_always_fine
            elif is_tmp_freshness == 1 and source_freshness not in ['VIDEOQUICK', 'VIDEOQUICK_MISSPELL']:
                freshness_coef = self.freshness_tmp_fine

            return freshness_coef

        def actuality(actuality_is_needed, serp_download_ts, published_date):
            actuality_coef = 1

            if actuality_is_needed and published_date:
                days_passed_td = datetime.fromtimestamp(serp_download_ts).date() - datetime.strptime(published_date, "%Y-%m-%dT%H:%M:%S%z").date()
                days_passed = days_passed_td.days
                if days_passed > self.actuality_max_fine_age_days:
                    actuality_coef = self.actuality_max_fine
                elif days_passed > self.actuality_no_fine_age_days:
                    actuality_coef = 1 - (self.actuality_max_fine / (self.actuality_max_fine_age_days - self.actuality_no_fine_age_days)) * days_passed

            return actuality_coef

        def need_player_boost(prerolls_share, preroll_middle):
            return 1.0 / (1.0 + math.exp(30 * (prerolls_share - preroll_middle)))

        results = metric_params.results[:self.depth]
        max_dcg = (1.0 + BOOST_VALUE) * sum([1.0 / math.log(2 + pos) for pos in range(self.depth)])
        params = json.loads(metric_params.serp_data.get("serp_query_param.custom_param", "{}"))
        updated_content_ids = json.loads(metric_params.serp_data.get("judgements.video_content_ids", {}).get("name", "[]"))

        actuality_is_needed = self.consider_actuality and metric_params.serp_data.get("serp_query_param.video_query_actuality", "no_need_actuality") == "need_actuality"
        serp_download_ts = metric_params.serp_data.get("headers", {}).get("timestamp", datetime.now().timestamp()) / 1000

        need_piracy_fine = False
        is_film = int(params.get("is_film", "0"))
        is_serial = int(params.get("is_serial", "0"))
        if is_film or is_serial:
            need_piracy_fine = True
            if metric_params.serp_data.get('judgements.video_object_duration') and metric_params.serp_data['judgements.video_object_duration']['name'] != 'unknown':
                object_type = metric_params.serp_data['judgements.video_object_type']['name']
                object_duration = metric_params.serp_data['judgements.video_object_duration']['name']
                min_duration = float(object_duration) * 0.8
                max_duration = float(object_duration) * 1.2
            else:
                if is_film:
                    min_duration = self.default_film_min_duration
                    max_duration = self.default_film_max_duration
                else:
                    min_duration = self.default_series_min_duration
                    max_duration = self.default_series_max_duration

        dcg = 0
        relevance_name = self.relevance_field
        video_quality_name, is_yandex_serp = get_factor_names(metric_params.serp_data)
        dups_fines = {}

        for res in results:
            pos_index = res.pos
            assert (pos_index >= 1)

            dups_fine = 1
            if self.with_dups_fine:
                dup_pos = int(res.scales.get(self.video_dups_field, "-1"))
                if dup_pos > 0:
                    dups_fine = dups_fines.get(dup_pos, 1) * 0.5
                    dups_fines[dup_pos] = dups_fine

            res_url =  res.scales.get("componentUrl",{}).get("pageUrl", "")
            res_host = GetHost(res_url)

            relevance = res.scales.get(relevance_name, '_404')
            rel_numeric = num_value('rel', relevance, REL_MINUS_WEIGHT)
            my_clickadd = get_factors_combination(res, metric_params, params, self.popularity_field, self.custom_component_only)

            prerolls_share = res.scales.get("player_prerolls_share", 1.0)
            if self.custom_component_only is not None:
                rel_value = coef(relevance) * (1 - self.player_boost) * my_clickadd
            else:
                rel_value = rel_numeric + coef(relevance) * (self.player_boost * need_player_boost(prerolls_share, self.preroll_middle) + (1 - self.player_boost) * my_clickadd)

            boost = 1.0
            player_id = res.scales.get("text.playerId")
            if need_piracy_fine:
                piracy = res.scales.get("player_stivenson_type")
                if not res.scales.get("text.videoDuration"):
                    duration = 0
                else:
                    convert_to_seconds=[1, 60, 3600]
                    duration = sum(int(x)*y for x, y in zip(res.scales.get("text.videoDuration").split(':')[::-1],
                                                            convert_to_seconds))
                is_trailer = False
                is_content = False
                is_fragment = False
                if res.scales.get("trailer_toloka", "OTHER") == "TRAILER":
                    is_trailer = True
                elif (duration >= min_duration and duration <= max_duration) or duration > max_duration:
                    is_content = True
                else:
                    is_fragment = True
                if is_content:
                    if piracy == "semi_pirate" and player_id not in ["ok", "vk", "mailru", "mailru_new"]:
                        boost = self.social_net_hosts_weight
                    elif piracy == "pirate":
                        boost = self.pirate_hosts_weight
                elif is_fragment:
                    boost = self.fragment_hosts_weight

            fresh_fine = freshness(params, res.scales.get('text.SERVER_DESCR', ''))
            actuality_fine = actuality(actuality_is_needed, serp_download_ts, res.scales.get("text.publishedDate"))

            vh_fine = 1.0
            if pos_index == 1 and len(updated_content_ids) > 0:
                current_content_id = res_url.split('/')[-1]
                if current_content_id != "" and current_content_id not in updated_content_ids:
                    vh_fine = 0.0

            if pos_index < 4:
                dcg += vh_fine * dups_fine * fresh_fine * actuality_fine * boost * rel_value / math.log(1 + pos_index)
            else:
                dcg += vh_fine * dups_fine * actuality_fine * boost * rel_value / math.log(1 + pos_index)

        return dcg / max_dcg


class VideoProximaCleanNormedMyClickadd:
    def __init__(self,
                 depth=5,
                 popularity_field="popularity",
                 relevance_field="video_relevance",
                 video_dups_field="video_dups",
                 video_quality_norm_weight=0.5,
                 video_quality_not_porno_weight=0,
                 kernel_not_porno_weight=0,
                 popularity_not_porno_weight=0,
                 player_quality_not_porno_weight=0,
                 video_quality_porno_weight=0,
                 kernel_porno_weight=0,
                 popularity_porno_weight=0,
                 player_quality_porno_weight=0,
                 kernel_multiplier=0.5,
                 boost_value=0.3,
                 fragment_hosts_weight=0.99,
                 social_net_hosts_weight=0.5,
                 pirate_hosts_weight=1.0,
                 freshness_always_fine=0.5,
                 freshness_tmp_fine=0,
                 dups_fine=0.5,
                 with_dups_fine=0,
                 player_boost=0.1,
                 preroll_middle=0.25,
                 custom_component_only=None,
                 consider_actuality=False,
                 actuality_no_fine_age_days=30,
                 actuality_max_fine_age_days=1095,
                 actuality_max_fine=0.5):
        self.depth = depth
        self.popularity_field = popularity_field
        self.relevance_field = relevance_field
        self.video_dups_field = video_dups_field
        self.video_quality_norm_weight = video_quality_norm_weight
        self.video_quality_not_porno_weight = video_quality_not_porno_weight
        self.kernel_not_porno_weight = kernel_not_porno_weight
        self.popularity_not_porno_weight = popularity_not_porno_weight
        self.player_quality_not_porno_weight = player_quality_not_porno_weight
        self.video_quality_porno_weight = video_quality_porno_weight
        self.kernel_porno_weight = kernel_porno_weight
        self.popularity_porno_weight = popularity_porno_weight
        self.player_quality_porno_weight = player_quality_porno_weight
        self.kernel_multiplier = kernel_multiplier
        self.boost_value = boost_value
        self.fragment_hosts_weight = fragment_hosts_weight
        self.social_net_hosts_weight = social_net_hosts_weight
        self.pirate_hosts_weight = pirate_hosts_weight
        self.default_fragment_min_duration = 3 * 60
        self.default_fragment_max_duration = 5 * 60
        self.default_film_min_duration = 60 * 60
        self.default_film_max_duration = 3 * 60 * 60
        self.default_series_min_duration = 5 * 60
        self.default_series_max_duration = 60 * 60

        self.freshness_always_fine = freshness_always_fine
        self.freshness_tmp_fine = freshness_tmp_fine

        self.dups_fine = dups_fine
        self.with_dups_fine = with_dups_fine

        self.player_boost = player_boost
        self.preroll_middle = preroll_middle

        self.custom_component_only = custom_component_only

        self.consider_actuality = consider_actuality
        self.actuality_no_fine_age_days = actuality_no_fine_age_days
        self.actuality_max_fine_age_days = actuality_max_fine_age_days
        self.actuality_max_fine = actuality_max_fine

        self.params = None
        self.video_quality_name = None

    def _get_host(self, url):
        p = "(?:http.*://)?(?P<host>[^:/ ]+).?(?P<port>[0-9]*).*"
        m = re.search(p, url)
        return m.group('host')

    def _get_factor_names(self, serp_data):
        host = self._get_host(serp_data["headers"]["cleanUrl"])
        if "yandex" in host:
            return "video_quality", True
        elif "google" in host:
            return "video_quality_google", False
        else:
            return "unknown_system", None

    def precompute(self, metric_params):
        self.params = json.loads(metric_params.serp_data.get("serp_query_param.custom_param", "{}"))
        self.video_quality_name, _ = self._get_factor_names(metric_params.serp_data)
        if self.video_quality_name == "unknown_system":
            raise Exception

    def value_by_position(self, position_metric_params):
        def num_value(param_name, label, middle_mark_weight):
            if param_name == "rel":
                v = {
                    "RELEVANT_PLUS": 1.0,
                    "RELEVANT_PLUS_NO_PLAYER": 0.5,
                    "RELEVANT_MINUS": middle_mark_weight,
                    "RELEVANT_MINUS_NO_PLAYER": middle_mark_weight/2,
                    "IRRELEVANT": 0.0,
                    "IRRELEVANT_NO_PLAYER": 0.0,
                    "SOFT_404": 0.0,
                    "SOFT_404_NO_PLAYER": 0.0,
                    "_404": 0.0,
                    "_404_NO_PLAYER": 0.0,
                    "VIRUS": 0.0,
                    "VIRUS_NO_PLAYER": 0.0
                }
            elif param_name == "quality":
                v = {"OK": 1.0, "NORM": middle_mark_weight, "BAD": 0.0, "_404": 0.0}
            return v[label]

        def get_factors_combination(res, params, popularity_field, custom_component_only):
            is_porno = int(params.get("is_porno", "0"))
            if is_porno:
                video_quality_weight = self.video_quality_porno_weight
                kernel_weight = self.kernel_porno_weight
                popularity_weight = self.popularity_porno_weight
                player_quality_weight = self.player_quality_porno_weight
            else:
                video_quality_weight = self.video_quality_not_porno_weight
                kernel_weight = self.kernel_not_porno_weight
                popularity_weight = self.popularity_not_porno_weight
                player_quality_weight = self.player_quality_not_porno_weight

            video_quality = num_value("quality",
                                      res.scales.get(self.video_quality_name, "_404"),
                                      self.video_quality_norm_weight)

            kernel_bundle = json.loads(res.scales.get("proxima_business_kernel_2019_10", "{}"))
            kernel = kernel_bundle.get("biz_kernel", 0.43) + 0.5

            res_url = res.scales.get("componentUrl", {}).get("pageUrl", "")
            res_host = self._get_host(res_url)
            popularity = res.scales.get(popularity_field, 0)
            if "frontend.vh" in res_host:
                popularity = res.scales.get("popularity_prediction", 0)
            player_quality = res.scales.get("toloka_player_functionality", 0)

            """
            vh_hosts = ["kinopoisk", "frontend.vh", "ott-widget"]
            if any(x in res_host for x in vh_hosts):
                popularity = 1.0
                kernel = 1.0
            """

            if custom_component_only == "popularity":
                my_clickadd = popularity * popularity_weight
            elif custom_component_only == "video_quality":
                my_clickadd = video_quality * video_quality_weight
            elif custom_component_only == "kernel":
                my_clickadd = kernel * kernel_weight
            elif custom_component_only == "player_quality":
                my_clickadd = player_quality * player_quality_weight
            else:
                my_clickadd = video_quality * video_quality_weight \
                              + kernel * kernel_weight \
                              + popularity * popularity_weight \
                              + player_quality * player_quality_weight

            norm_weight = video_quality_weight \
                          + kernel_weight * self.kernel_multiplier \
                          + popularity_weight \
                          + player_quality_weight
            return float(my_clickadd) / norm_weight

        normed_my_clickadd = get_factors_combination(
                                position_metric_params.result,
                                self.params,
                                self.popularity_field,
                                self.custom_component_only)
        return normed_my_clickadd

    def aggregate_by_position(self, agg_metric_params):
        return sum([x.value for x in agg_metric_params.pos_metric_values[:self.depth]]) / self.depth
