# -*- coding: utf-8 -*-

import logging

__all__ = ['Analyser']


class Analyser(object):
    """
        creates analyser functional object
        __call__: (parsed1, parsed2) -->
        {
            'edit_dist': edit_dist,
            'unmatched_count': unmatched_count,
            'displaced_count': displaced_count,
            'docs' : [parsed],
            'displaced': [di0, di1, ...]
        }
    """

    def __init__(self):
        self.key = lambda x: x['url']

    def __call__(self, seq1, seq2):
        len1 = len(seq1)
        len2 = len(seq2)
        if len1 < 10 or len2 < 10:
            logging.warn(
                'Unexpected SERP length: {0} and {1}'.format(len1, len2))
        if len1 > len2:
            seq2.extend([{'header': '', 'url': ''}] * (len1 - len2))
        if len2 > len1:
            seq1.extend([{'header': '', 'url': ''}] * (len2 - len1))
        seq2_keys = map(self.key, seq2)
        unmatched_count = reduce(
            lambda x, y: x + int(self.key(y) not in seq2_keys),
            seq1,
            0
        )
        displaced = filter(
            lambda x: self.key(seq1[x]) != self.key(seq2[x]),
            range(len1)
        )

        return {
            'edit_dist': Analyser.__levenshtein_dist(seq1, seq2, key=self.key),
            'unmatched_count': unmatched_count,
            'displaced_count': abs(len(displaced) - unmatched_count),
            'docs': (seq1, seq2),
            'displaced': displaced
        }

    @staticmethod
    def __levenshtein_dist(seq1, seq2, key=lambda x: x):
        len1 = len(seq1)
        len2 = len(seq2)
        d = [[0 for j in xrange(len2 + 1)] for i in xrange(len1 + 1)]
        for i in xrange(len1 + 1):
            d[i][0] = i
        for j in xrange(len2 + 1):
            d[0][j] = j
        for j in xrange(len2):
            for i in xrange(len1):
                if key(seq1[i]) == key(seq2[j]):
                    d[i+1][j+1] = d[i][j]
                else:
                    d[i+1][j+1] = min(d[i][j+1], d[i+1][j], d[i][j]) + 1
        return d[-1][-1]
