#!/usr/bin/python
# coding=utf-8

__author__ = 'shiryaev'

class BitArray(object) :
    def __init__(self, n) :
        self.n = n
        self.ba = bytearray(b'\x00' * ((n//8) + (1 if ((n % 8) > 0) else 0)))
    def __setitem__(self, key, value) :
        if not (type(key) is int and 0 <= key < self.n) :
            raise KeyError()
        if value not in (0, 1) :
            raise ValueError()
        hi, low = (key//8, key % 8)
        self.ba[hi] &= ~(1 << low)
        self.ba[hi] |= (value * (1 << low))
    def __getitem__(self, key) :
        if not (type(key) is int and 0 <= key < self.n) :
            raise KeyError()
        hi, low = (key//8, key % 8)
        return (self.ba[hi] >> low) & 1


class MyBloomFilter(object) :
    def __init__(self, n, k) :
        self.n = n
        self.n_hashes = k
        self.bitarray = BitArray(n)
    def add(self, s) :
        for i in xrange(self.n_hashes) :
            self.bitarray[hash(str(i)+s) % self.n] = 1
    def has(self, s) :
        for i in xrange(self.n_hashes) :
            if self.bitarray[hash(str(i)+s) % self.n] == 0 :
                return False
        return True
    def __contains__(self, s) :
        return self.has(s)
    def add_set(self, iter) :
        for s in iter :
            self.add(s)


if __name__ == "__main__" :
    from random import randint
    for n in (1000, 10000, 100000, 1000000) :
        for k in xrange(1, 10) :
            n_samples = 1000
            positive = [str(randint(1, 100000000000000000)) for i in xrange(n_samples)]
            negative = [str(randint(1, 100000000000000000)) for i in xrange(10*n_samples)]
            bloom = MyBloomFilter(n, k)
            for pos in positive :
                bloom.add(pos)
            check = all([pos in bloom for pos in positive])
            n_errors = len([1 for neg in negative if neg in bloom])
#            check = all([bloom.has(pos) for pos in positive])
#            n_errors = len([1 for neg in negative if bloom.has(neg)])
            print "[n=%d, k=%d, samples=%d] : " % (n, k, n_samples) + \
                  ("OK" if check else "Failed") + \
                  " %0.2f%% false positive" % (100.*n_errors/len(negative))
