C++ Program to Implement Bloom Filter

This C++ Program demonstrates operations on Bloom Filter.

Here is source code of the C++ Program to demonstrate Bloom Filter. The C++ program is successfully compiled and run on a Linux system. The program output is also shown below.

  1. /*
  2.  * C++ Program to Implement Bloom Filter
  3.  */
  4. #include <iostream>
  5. #include <cstdio>
  6. #include <cstdlib>
  7. #include <cstring>
  8. using namespace std;
  9.  
  10. #define POLYNOMIAL 0x04C11DB7L  // Standard CRC-32 polynomial
  11. #define M 32   // Number of bits in the Bloom filter
  12. #define K 4   // Number of bits set per mapping in filter
  13. typedef unsigned short int word16;
  14. typedef unsigned int word32;
  15.  
  16. static word32 CrcTable[256];       // Table of 8-bit CRC32 remainders
  17. char BFilter[M / 8];      // Bloom filter array of M/8 bytes
  18. word32 NumBytes;            // Number of bytes in Bloom filter
  19.  
  20. void gen_crc_table(void);
  21. word32 update_crc(word32 crc_accum, char *data_ptr, word32 data_size);
  22. void mapBloom(word32 hash);
  23. int testBloom(word32 hash);
  24. /*
  25.  * Main Function
  26.  */
  27. int main()
  28. {
  29.     FILE *fp1;
  30.     FILE *fp2;
  31.     char inFile1[256];
  32.     char inFile2[256];
  33.     char inString[1024];
  34.     word32 crc32;
  35.     int retCode;
  36.     word32 i;
  37.     cout<<"------------------------------------------ "<<endl;
  38.     cout<<"-  Program to implement a general Bloom filter\n";
  39.     cout<<"------------------------------------------ "<<endl;
  40.     // Determine number of bytes in Bloom Filter
  41.     NumBytes = M / 8;
  42.     if ((M % 8) != 0)
  43.     {
  44.         cout<<"*** ERROR - M value must be divisible by 8 \n";
  45.         exit(1);
  46.     }
  47.     // Clear the Bloom filter
  48.     for (i = 0; i < NumBytes; i++)
  49.         BFilter[i] = 0x00;
  50.     // Initialize the CRC32 table
  51.     gen_crc_table();
  52.     cout<<"File name of input list to add to filter ===========> ";
  53.     cin>>inFile1;
  54.     fp1 = fopen(inFile1, "r");
  55.     if (fp1 == NULL)
  56.     {
  57.         cout<<"ERROR in opening input file #1 ("<<inFile1<<") *** \n";
  58.         exit(1);
  59.     }
  60.     cout<<"File name of input list to check for match =========> ";
  61.     cin>>inFile2;
  62.     fp2 = fopen(inFile2, "r");
  63.     if (fp2 == NULL)
  64.     {
  65.         cout<<"ERROR in opening input file #1 ("<<inFile2<<") *** \n";
  66.         exit(1);
  67.     }
  68.  
  69.     // Read input file #1 and map each string to Bloom filter
  70.     while (1)
  71.     {
  72.         fgets(inString, 1024, fp1);
  73.         if (feof(fp1))
  74.             break;
  75.         for (i = 0; i < K; i++)
  76.         {
  77.             crc32 = update_crc(i, inString, strlen(inString));
  78.             mapBloom(crc32);
  79.         }
  80.     }
  81.     fclose(fp1);
  82.  
  83.     // Output the Bloom filter
  84.     cout<<"-------------------------------------------------------- \n";
  85.     cout<<"Bloom filter is (M = "<<M<<" bits and K = "<<K<<" mappings)... \n";
  86.     for (i = 0; i < NumBytes; i++)
  87.         printf("%2d", i);
  88.     cout<<endl;
  89.     for (i = 0; i < NumBytes; i++)
  90.         printf("%02X", BFilter[i]);
  91.     cout<<endl;
  92.  
  93.     // Output results header
  94.     cout<<"-----------------------------------------------------\n";
  95.     cout<<"Matching strings are... \n";
  96.  
  97.     while(1)
  98.     {
  99.         fgets(inString, 1024, fp2);
  100.         if (feof(fp2))
  101.             break;
  102.         for (i = 0; i < K; i++)
  103.         {
  104.             crc32 = update_crc(i, inString, strlen(inString));
  105.             retCode = testBloom(crc32);
  106.             if (retCode == 0)
  107. 	           break;
  108.     }
  109.     if (retCode == 1)
  110.         cout<<inString;
  111.   }
  112.   fclose(fp2);
  113. }
  114.  
  115. /*
  116.  * Function to initialize CRC32 table
  117.  */
  118. void gen_crc_table(void)
  119. {
  120.     register word32 crc_accum;
  121.     register word16 i, j;
  122.     // Initialize the CRC32 8-bit look-up table
  123.     for (i = 0; i < 256; i++)
  124.     {
  125.         crc_accum = ((word32) i << 24);
  126.         for (j = 0; j < 8; j++)
  127.         {
  128.             if (crc_accum & 0x80000000L)
  129.                 crc_accum = (crc_accum << 1) ^ POLYNOMIAL;
  130.             else
  131.                 crc_accum = (crc_accum << 1);
  132.         }
  133.         CrcTable[i] = crc_accum;
  134.     }
  135. }
  136. /*
  137.  * Function to generate CRC32
  138.  */
  139. word32 update_crc(word32 crc_accum, char *data_blk_ptr, word32 data_blk_size)
  140. {
  141.     register word32 i, j;
  142.     for (j = 0; j < data_blk_size; j++)
  143.     {
  144.         i = ((int) (crc_accum >> 24) ^ *data_blk_ptr++) & 0xFF;
  145.         crc_accum = (crc_accum << 8) ^ CrcTable[i];
  146.     }
  147.     crc_accum = ~crc_accum;
  148.  
  149.     return crc_accum;
  150. }
  151. /*
  152.  * Function to map hash into Bloom filter
  153.  */
  154. void mapBloom(word32 hash)
  155. {
  156.     int tempInt;
  157.     int bitNum;
  158.     int byteNum;
  159.     unsigned char mapBit;
  160.     tempInt = hash % M;
  161.     byteNum = tempInt / 8;
  162.     bitNum = tempInt % 8;
  163.  
  164.     mapBit = 0x80;
  165.     mapBit = mapBit >> bitNum;
  166.  
  167.     // Map the bit into the Bloom filter
  168.     BFilter[byteNum] = BFilter[byteNum] | mapBit;
  169. }
  170. /*
  171.  * Function to test for a Bloom filter match
  172.  */
  173. int testBloom(word32 hash)
  174. {
  175.     int tempInt;
  176.     int bitNum;
  177.     int byteNum;
  178.     unsigned char testBit;
  179.     int retCode;
  180.     tempInt = hash % M;
  181.     byteNum = tempInt / 8;
  182.     bitNum = tempInt % 8;
  183.  
  184.     testBit = 0x80;
  185.     testBit = testBit >> bitNum;
  186.     if (BFilter[byteNum] & testBit)
  187.         retCode = 1;
  188.     else
  189.         retCode = 0;
  190.     return retCode;
  191. }

$ g++ BloomFilter.cpp
$ a.out
-----------in1.txt------------
apple pie
big box
cats and dogs
ditto
easy does it
fun and games
-----------------------------
 
----------in2.txt------------
apple pie
bigger box
cats and dogs and mice
ditto
easy does it for now
fun and games and what else
-----------------------------
 
------------------------------------------ 
-  Program to implement a general Bloom filter
------------------------------------------ 
File name of input list to add to filter ===========> in1.txt
File name of input list to check for match =========> in2.txt
-------------------------------------------------------- 
Bloom filter is (M = 32 bits and K = 4 mappings)... 
 0 1 2 3
FFFFFFB6FFFFFFAEFFFFFF876C
-----------------------------------------------------
Matching strings are... 
apple pie
ditto
 
 
------------------
(program exited with code: 0)
Press return to continue

Sanfoundry Global Education & Learning Series – 1000 C++ Programs.

advertisement
advertisement
If you wish to look at all C++ Programming examples, go to C++ Programs.

If you find any mistake above, kindly email to [email protected]

advertisement
advertisement
Subscribe to our Newsletters (Subject-wise). Participate in the Sanfoundry Certification contest to get free Certificate of Merit. Join our social networks below and stay updated with latest contests, videos, internships and jobs!

Youtube | Telegram | LinkedIn | Instagram | Facebook | Twitter | Pinterest
Manish Bhojasia - Founder & CTO at Sanfoundry
Manish Bhojasia, a technology veteran with 20+ years @ Cisco & Wipro, is Founder and CTO at Sanfoundry. He lives in Bangalore, and focuses on development of Linux Kernel, SAN Technologies, Advanced C, Data Structures & Alogrithms. Stay connected with him at LinkedIn.

Subscribe to his free Masterclasses at Youtube & discussions at Telegram SanfoundryClasses.