#!/usr/bin/env python
# -*- coding: utf-8 -*-

from collections import defaultdict


def jaccard_index(set_1, set_2):
    n = len(set_1.intersection(set_2))
    return n / float(len(set_1) + len(set_2) - n)


def path_sim_metric(paths):
    metric = defaultdict(list)
    path_len = len(paths)
    for path in paths:
        if metric.get(path):
            continue

        path_num = 0
        while path_num < path_len:
            value = jaccard_index(set(path.split('/')), set(paths[path_num].split('/')))
            metric[path].append(value)
            path_num += 1

    return metric


def filter_similar_paths(paths):
    valuable_paths = paths
    paths = list(paths)
    metric = path_sim_metric(paths)
    path_num = 0
    for path in paths:
        if path not in valuable_paths:
            path_num += 1
            continue

        values = metric[path]
        value_num = 0
        for value in values:
            if path_num == value_num:
                value_num += 1
                continue

            if value > 0.5:
                sim = paths[value_num]
                if sim != path:
                    try:
                        valuable_paths.remove(paths[value_num])

                    # Cause we delete it before on other similar cases
                    except KeyError:
                        continue

            value_num += 1

        path_num += 1

    return valuable_paths
