import math
import re
import json
import datetime
import time

class DuplicatesShare:
    def __init__(self, depth=5):
        self.depth = depth

    def value(self, metric_params):

        def GetHost(url):
            p = '(?:http.*://)?(?P<host>[^:/ ]+).?(?P<port>[0-9]*).*'
            m = re.search(p, url)
            return m.group('host')

        def change_duration(duration):
            if len(duration) == 0:
                return 0
            elif len(duration) == 2:
                return int(duration)
            elif len(duration) == 5:
                tmp_time = time.strptime(duration,'%M:%S')
                return int(datetime.timedelta(hours=tmp_time.tm_hour,minutes=tmp_time.tm_min,seconds=tmp_time.tm_sec).total_seconds())
            elif len(duration) > 5:
                if 'days' in duration:
                    days = int(duration.split(', ')[0].split(' days')[0])
                    tmp_time = time.strptime(duration.split(', ')[1].strip(),'%H:%M:%S')
                    return (days*3600 + int(datetime.timedelta(hours=tmp_time.tm_hour,minutes=tmp_time.tm_min,seconds=tmp_time.tm_sec).total_seconds()))
                elif 'day' in duration:
                    days = int(duration.split(', ')[0].split(' day')[0])
                    tmp_time = time.strptime(duration.split(', ')[1].strip(),'%H:%M:%S')
                    return (days*3600 + int(datetime.timedelta(hours=tmp_time.tm_hour,minutes=tmp_time.tm_min,seconds=tmp_time.tm_sec).total_seconds()))
                else:
                    tmp_time = time.strptime(duration,'%H:%M:%S')
                    return int(datetime.timedelta(hours=tmp_time.tm_hour,minutes=tmp_time.tm_min,seconds=tmp_time.tm_sec).total_seconds())

        results = metric_params.results[:self.depth]

        if len(results) == 0:
            return -1

        uniq_count = 0

        for i in range(len(results)):
            res_1 = results[i]
            url_1 = res_1.scales.get("componentUrl",{}).get("pageUrl", "")
            title_1 = res_1.scales.get("text.title")
            dur_1 = change_duration(res_1.scales.get("text.videoDuration", "00:00"))
            has_dup = 0
            for res_2 in results[:i]:
                url_2 = res_2.scales.get("componentUrl",{}).get("pageUrl", "")
                title_2 = res_2.scales.get("text.title")
                dur_2 = change_duration(res_2.scales.get("text.videoDuration", "00:00"))

                if dur_1 and dur_2:
                    if GetHost(url_1) == GetHost(url_2) and title_1 == title_2 and (abs(dur_1-dur_2)*1.0/min(dur_1, dur_2)) <= 0.1:
                        has_dup = 1
                else:
                    if GetHost(url_1) == GetHost(url_2) and title_1 == title_2:
                        has_dup = 1

            for res_2 in results[i+1:]:
                url_2 = res_2.scales.get("componentUrl",{}).get("pageUrl", "")
                title_2 = res_2.scales.get("text.title")
                dur_2 = change_duration(res_2.scales.get("text.videoDuration", "00:00"))

                if dur_1 and dur_2:
                    if GetHost(url_1) == GetHost(url_2) and title_1 == title_2 and (abs(dur_1-dur_2)*1.0/min(dur_1, dur_2)) <= 0.1:
                        has_dup = 1
                else:
                    if GetHost(url_1) == GetHost(url_2) and title_1 == title_2:
                        has_dup = 1

            if has_dup == 0:
                 uniq_count +=1

        return 1.0 * (len(results) - uniq_count) / len(results)


class VhDuplicatesShare:
    def __init__(self, depth=5):
        self.depth = depth

    def value(self, metric_params):

        def GetHost(url):
            p = '(?:http.*://)?(?P<host>[^:/ ]+).?(?P<port>[0-9]*).*'
            m = re.search(p, url)
            return m.group('host')

        def change_duration(duration):
            if len(duration) == 0:
                return 0
            elif len(duration) == 2:
                return int(duration)
            elif len(duration) == 5:
                tmp_time = time.strptime(duration,'%M:%S')
                return int(datetime.timedelta(hours=tmp_time.tm_hour,minutes=tmp_time.tm_min,seconds=tmp_time.tm_sec).total_seconds())
            elif len(duration) > 5:
                if 'days' in duration:
                    days = int(duration.split(', ')[0].split(' days')[0])
                    tmp_time = time.strptime(duration.split(', ')[1].strip(),'%H:%M:%S')
                    return (days*3600 + int(datetime.timedelta(hours=tmp_time.tm_hour,minutes=tmp_time.tm_min,seconds=tmp_time.tm_sec).total_seconds()))
                elif 'day' in duration:
                    days = int(duration.split(', ')[0].split(' day')[0])
                    tmp_time = time.strptime(duration.split(', ')[1].strip(),'%H:%M:%S')
                    return (days*3600 + int(datetime.timedelta(hours=tmp_time.tm_hour,minutes=tmp_time.tm_min,seconds=tmp_time.tm_sec).total_seconds()))
                else:
                    tmp_time = time.strptime(duration,'%H:%M:%S')
                    return int(datetime.timedelta(hours=tmp_time.tm_hour,minutes=tmp_time.tm_min,seconds=tmp_time.tm_sec).total_seconds())

        results = metric_params.results[:self.depth]

        if len(results) == 0:
            return -1

        uniq_count = 0
        total = 0

        for i in range(len(results)):
            res_1 = results[i]
            url_1 = res_1.scales.get("componentUrl",{}).get("pageUrl", "")
            if GetHost(url_1) != 'frontend.vh.yandex.ru':
                continue
            total += 1
            title_1 = res_1.scales.get("text.title")
            dur_1 = change_duration(res_1.scales.get("text.videoDuration", "00:00"))
            has_dup = 0
            for res_2 in results[:i]:
                url_2 = res_2.scales.get("componentUrl",{}).get("pageUrl", "")
                title_2 = res_2.scales.get("text.title")
                dur_2 = change_duration(res_2.scales.get("text.videoDuration", "00:00"))

                if dur_1 and dur_2:
                    if GetHost(url_1) == GetHost(url_2) and title_1 == title_2 and (abs(dur_1-dur_2)*1.0/min(dur_1, dur_2)) <= 0.1:
                        has_dup = 1
                else:
                    if GetHost(url_1) == GetHost(url_2) and title_1 == title_2:
                        has_dup = 1

            for res_2 in results[i+1:]:
                url_2 = res_2.scales.get("componentUrl",{}).get("pageUrl", "")
                title_2 = res_2.scales.get("text.title")
                dur_2 = change_duration(res_2.scales.get("text.videoDuration", "00:00"))
                if dur_1 and dur_2:
                    if GetHost(url_1) == GetHost(url_2) and title_1 == title_2 and (abs(dur_1-dur_2)*1.0/min(dur_1, dur_2)) <= 0.1:
                        has_dup = 1
                else:
                    if GetHost(url_1) == GetHost(url_2) and title_1 == title_2:
                        has_dup = 1

            if has_dup == 0:
                 uniq_count +=1

        if total:
            return 1.0 * (total - uniq_count) / total
        else:
            return 0
