Bloom Filter in F#

A Bloom filter, conceived by Burton Howard Bloom in 1970,[1] is a space-efficient probabilistic data structure that is used to test whether an element is a member of a set. False positive retrieval results are possible, but false negatives are not; i.e. a query returns either “inside set (may be wrong)” or “definitely not in set”. Read more about it on http://en.wikipedia.org/wiki/Bloom_filter

 
//http://en.literateprograms.org/Hash_function_comparison_%28C%2C_sh%29
module BloomFilter =
   let SDBMHash (v:string) : uint32 =
        let mutable h = 0u
        for i = 0 to v.Length - 1  do
            h  <-  ((uint32)(v.Chars (i)))+(h <<< 6)+(h <<< 16)-h  
        h    

    // SAX: http://www.eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
   let SAXHash (v: string): uint32 =
        let mutable h = 0u
        for i = 0 to v.Length - 1 do
            h <-  h ^^^ (h <<< 5) + (h >>> 2) + (uint32)(v.Chars (i)) 
        h    
                
   let GetHash (v:string):(uint32 * uint32) = 
        (SAXHash v , SDBMHash v)
                 
    // double hashing: http://www.ccs.neu.edu/home/pete/pub/bloom-filters-verification.pdf Section 5.1
   type Bloom(filterSize: float32) = 
        let blockSize = (int32) 100000000  //100 MB Block
        let p = 0.001f //false positive tolerance
        let l2 = float32 (log 2.0)
        //From  http://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives
        let m = (-filterSize * log p) / float32 (l2 * l2)
        let mEff = float32 2.0 ** ceil(float32 (log m) / l2)
        let b = mEff / 8.0F

        (*  for constant 50
            Collissions %:14.907256
            Existing Check-  SucccessCtr % : 100
            Non existing Check - SucccessCtr % : 99
            changing this to 30 drop the accuracy to 21%
        *)

        let numBlocks = (int)(b / float32 blockSize) + 50 //Magical constant to increase numBlocks to increase the accuracy of BF, for 100 Million objects it is 99% accurate.
                
        let XAxis (v:uint32) :uint32 = v % (uint32) numBlocks
        let YAxis (v:uint32) :uint32 = v % (uint32) blockSize 
            
        let mutable collisionCounter = 0u
     
        let mutable filterBits = [| for i in 0u ..(uint32)numBlocks ->  Array.zeroCreate blockSize |]
        
        let Check (a,b) = 
            let byteVal = (byte) (1 <<< (int)(a % 8u))
            if (((filterBits.[int (XAxis a)]).[int (YAxis a)] &&& byteVal ) = 0uy ) then
                true
            else
                let byteVal = (byte)(1 <<< (int)(b % 8u))
                if ((filterBits.[int (XAxis b)]).[int (YAxis b)] &&& byteVal = 0uy)  then
                    true //Certainly not in Bloom filter 100% sure
                else
                    false //may be :(

 
        let Insert (v: uint32) =
            let mutable bCollision = false
            let mutable o = (filterBits.[int(XAxis v)]).[int(YAxis v)] 
            if ((byte) o) <> 0uy then 
                bCollision <- true
            let byteVal = (byte) (1 <<< (int)(v % 8u))
            o <- o ||| byteVal
            (filterBits.[int (XAxis v)]).SetValue (o, int(YAxis v))
            bCollision

        member this.Collisions = collisionCounter
            
        member this.AddValue (v:string)  =
            let a,b = GetHash v 
            if (Insert a && Insert b) then
                            collisionCounter <- collisionCounter + 1u 
                        
                                 
        ///<summary> return true, definatly not in Bloom filter. returns false for may be  </summary>
        member this.CheckValue (v:string) =
            GetHash v |> Check
                
        member this.BloomSize = 
            blockSize * numBlocks 

Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s