import math
import re
import json

class VideoProxima:
    def __init__(self, depth=5,
                 popularity_field = "popularity",
                 video_quality_norm_weight=0.5,
                 video_quality_not_porno_weight=0,
                 kernel_not_porno_weight=0,
                 popularity_not_porno_weight=0,
                 player_quality_not_porno_weight=0,
                 binary_relevance_not_porno_weight=0,
                 video_quality_porno_weight=0,
                 kernel_porno_weight=0,
                 popularity_porno_weight=0,
                 player_quality_porno_weight=0,
                 binary_relevance_porno_weight=0,
                 kernel_multiplier=0.5,
                 boost_value=0.3,
                 fragment_hosts_weight=0.99,
                 social_net_hosts_weight=0.5,
                 pirate_hosts_weight=1.0,
                 freshness_always_fine=0.5,
                 freshness_tmp_fine=0):
        self.depth = depth
        self.popularity_field = popularity_field
        self.video_quality_norm_weight = video_quality_norm_weight

        self.video_quality_not_porno_weight = video_quality_not_porno_weight
        self.kernel_not_porno_weight = kernel_not_porno_weight
        self.popularity_not_porno_weight = popularity_not_porno_weight
        self.player_quality_not_porno_weight = player_quality_not_porno_weight
        self.binary_relevance_not_porno_weight = binary_relevance_not_porno_weight

        self.video_quality_porno_weight = video_quality_porno_weight
        self.kernel_porno_weight = kernel_porno_weight
        self.popularity_porno_weight = popularity_porno_weight
        self.player_quality_porno_weight = player_quality_porno_weight
        self.binary_relevance_porno_weight = binary_relevance_porno_weight

        self.kernel_multiplier = kernel_multiplier
        self.boost_value = boost_value

        self.fragment_hosts_weight = fragment_hosts_weight
        self.social_net_hosts_weight = social_net_hosts_weight
        self.pirate_hosts_weight = pirate_hosts_weight
        self.default_fragment_min_duration = 3 * 60
        self.default_fragment_max_duration = 5 * 60
        self.default_film_min_duration = 60 * 60
        self.default_film_max_duration = 3 * 60 * 60
        self.default_series_min_duration = 5 * 60
        self.default_series_max_duration = 60 * 60

        self.freshness_always_fine = freshness_always_fine
        self.freshness_tmp_fine = freshness_tmp_fine

    def value(self, metric_params):

        REL_MINUS_WEIGHT = 0.7
        BOOST_VALUE = self.boost_value

        def num_value(param_name, label, middle_mark_weight):
            if param_name == 'rel':
                v = {'RELEVANT_PLUS': 1.0, 'RELEVANT_MINUS': middle_mark_weight, 'IRRELEVANT': 0.0, \
                     'SOFT_404': 0.0, '_404': 0.0, 'VIRUS' : 0.0}
            elif param_name == 'quality':
                v = {'OK': 1.0, 'NORM': middle_mark_weight, 'BAD': 0.0, '_404': 0.0}
            return v[label]

        def coef(rel):
            v = {'RELEVANT_PLUS': BOOST_VALUE,
                 'RELEVANT_MINUS': BOOST_VALUE,
                 'IRRELEVANT': 0.0
                 }
            return v.get(rel, 0.0)

        def GetHost(url):
            p = '(?:http.*://)?(?P<host>[^:/ ]+).?(?P<port>[0-9]*).*'
            m = re.search(p, url)
            return m.group('host')

        def get_factor_names(serp_data):
            host = GetHost(serp_data["headers"]["cleanUrl"])
            if "yandex" in host:
                return "video_quality", True
            elif "google" in host:
                return "video_quality_google", False
            else:
                return "unknown_system", None

        def get_factors_combination(res, metric_params, params, popularity_field):
            video_quality_name, is_yandex_serp = get_factor_names(metric_params.serp_data)

            if video_quality_name == "unknown_system":
                raise Exception
                return 0

            is_porno = int(params.get("is_porno", "0"))

            if is_porno:
                video_quality_weight = self.video_quality_porno_weight
                kernel_weight = self.kernel_porno_weight
                popularity_weight = self.popularity_porno_weight
                player_quality_weight = self.player_quality_porno_weight
                binary_relevance_weight = self.binary_relevance_porno_weight
            else:
                video_quality_weight = self.video_quality_not_porno_weight
                kernel_weight = self.kernel_not_porno_weight
                popularity_weight = self.popularity_not_porno_weight
                player_quality_weight = self.player_quality_not_porno_weight
                binary_relevance_weight = self.binary_relevance_not_porno_weight

            video_quality = num_value('quality', res.scales.get(video_quality_name, '_404'), self.video_quality_norm_weight)

            kernel = float(res.scales.get('video_albin_host_bundle_v2', 0.486))
            popularity = res.scales.get(popularity_field, 0)
            player_quality = res.scales.get('toloka_player_functionality', 0)
            binary_relevance = res.scales.get('video_relevance_bin', 'IRREL') == 'REL'

            my_clickadd = video_quality * video_quality_weight \
                          + kernel * kernel_weight \
                          + popularity * popularity_weight \
                          + player_quality * player_quality_weight \
                          + binary_relevance * binary_relevance_weight

            my_clickadd = float(my_clickadd) / (video_quality_weight + kernel_weight * self.kernel_multiplier + popularity_weight + player_quality_weight + binary_relevance_weight)

            return my_clickadd

        def freshness(params, source_freshness):

            freshness_coef = 1

            is_always_freshness = int(params.get("is_always_freshness", "0"))
            is_tmp_freshness = int(params.get("is_tmp_freshness", "0"))

            if is_always_freshness == 1 and source_freshness not in ['VIDEOQUICK', 'VIDEOQUICK_MISSPELL']:
                freshness_coef = self.freshness_always_fine
            elif is_tmp_freshness == 1 and source_freshness not in ['VIDEOQUICK', 'VIDEOQUICK_MISSPELL']:
                freshness_coef = self.freshness_tmp_fine

            return freshness_coef

        results = metric_params.results[:self.depth]

        max_dcg = (1.0 + BOOST_VALUE) * sum([1.0 / math.log(2 + pos) for pos in range(self.depth)])

        params = json.loads(metric_params.serp_data.get("serp_query_param.custom_param", "{}"))

        need_piracy_fine = False
        is_film = int(params.get("is_film", "0"))
        is_serial = int(params.get("is_serial", "0"))

        if is_film or is_serial:
            need_piracy_fine = True
            if metric_params.serp_data.get('judgements.video_object_duration') and metric_params.serp_data['judgements.video_object_duration']['name'] != 'unknown':
                object_type = metric_params.serp_data['judgements.video_object_type']['name']
                object_duration = metric_params.serp_data['judgements.video_object_duration']['name']
                min_duration = float(object_duration) * 0.8
                max_duration = float(object_duration) * 1.2
            else:
                if is_film:
                    min_duration = self.default_film_min_duration
                    max_duration = self.default_film_max_duration
                else:
                    min_duration = self.default_series_min_duration
                    max_duration = self.default_series_max_duration

        print "query is %s" %(metric_params.serp_data.get("query")["text"])

        dcg = 0
        relevance_name = "video_relevance"
        video_quality_name, is_yandex_serp = get_factor_names(metric_params.serp_data)
        for res in results:
            pos_index = res.pos

            print "pos is %d" %pos_index

            assert (pos_index >= 1)

            relevance = res.scales.get(relevance_name, '_404')
            rel_numeric = num_value('rel', relevance, REL_MINUS_WEIGHT)
            rel_value = rel_numeric + coef(relevance) * get_factors_combination(res, metric_params, params, self.popularity_field)

            boost = 1.0

            if need_piracy_fine:

                piracy = res.scales.get("player_stivenson_type")

                if not res.scales.get("text.videoDuration"):
                    duration = 0
                else:
                    convert_to_seconds=[1, 60, 3600]
                    duration = sum(int(x)*y for x, y in zip(res.scales.get("text.videoDuration").split(':')[::-1],
                                                            convert_to_seconds))

                is_trailer = False
                is_content = False
                is_fragment = False

                if res.scales.get("trailer_toloka", "OTHER") == "TRAILER":
                    is_trailer = True
                elif (duration >= min_duration and duration <= max_duration) or duration > max_duration:
                    is_content = True
                else:
                    is_fragment = True

                if is_content:
                    if piracy == "semi_pirate":
                        boost = self.social_net_hosts_weight
                    elif piracy == "pirate":
                        boost = self.pirate_hosts_weight
                elif is_fragment:
                    boost = self.fragment_hosts_weight
            print 'old boost is %f' %(boost)

            fresh_fine = freshness(params, res.scales.get('text.SERVER_DESCR', ''))

            if pos_index < 4:
                dcg += fresh_fine * boost * rel_value / math.log(1 + pos_index)
                print "has fresh fine"
            else:
                dcg += boost * rel_value / math.log(1 + pos_index)
                print "no fresh fine"

            print "new_rel_value is %f" %(rel_value)
            print "new_dcg is %f" %(fresh_fine * boost * rel_value)

        return dcg / max_dcg
