C++ Program to Implement CountMinSketch

«
»
This C++ Program demonstrates the implementation of Count Min Sketch.

Here is source code of the C++ Program to demonstrate Count Min Sketch. The C++ program is successfully compiled and run on a Linux system. The program output is also shown below.

  1. /*
  2.  * C++ Program to Implement Count Min Sketch
  3.  */
  4. # include <iostream>
  5. # include <cmath>
  6. # include <map>
  7. # include <cstdlib>
  8. # include <ctime>
  9. # include <limits>
  10. # include <cstring>
  11. # define LONG_PRIME 32993
  12. # define MIN(a,b) (a < b ? a : b)
  13. # define ui unsigned int
  14. using namespace std;
  15.  
  16. /*
  17.  * Class Declaration
  18.  */
  19. class CountMinSketch
  20. {
  21.     private:
  22.         ui w,d;
  23.         float eps;
  24.         float gamma;
  25.         ui aj, bj;
  26.         ui total;
  27.         int **C;
  28.         int **hashes;
  29.         void genajbj(int **hashes, int i);
  30.     public:
  31.         /*
  32.          * Constructor
  33.          */
  34.         CountMinSketch(float ep, float gamm)
  35.         {
  36.             if (!(0.009 <= ep && ep < 1))
  37.             {
  38.                 cout << "eps must be in this range: [0.01, 1)" << endl;
  39.                 exit(1);
  40.             }
  41.             else if (!(0 < gamm && gamm < 1))
  42.             {
  43.                 cout << "gamma must be in this range: (0,1)" << endl;
  44.                 exit(1);
  45.             }
  46.             eps = ep;
  47.             gamma = gamm;
  48.             w = ceil(exp(1) / eps);
  49.             d = ceil(log(1 / gamma));
  50.             total = 0;
  51.             C = new int *[d];
  52.             ui i, j;
  53.             for (i = 0; i < d; i++)
  54.             {
  55.                 C[i] = new int[w];
  56.                 for (j = 0; j < w; j++)
  57.                 {
  58.                     C[i][j] = 0;
  59.                 }
  60.             }
  61.             srand(time(NULL));
  62.             hashes = new int* [d];
  63.             for (i = 0; i < d; i++)
  64.             {
  65.                 hashes[i] = new int[2];
  66.                 genajbj(hashes, i);
  67.             }
  68.         }
  69.         /*
  70.          * Destructor
  71.          */
  72.         ~CountMinSketch()
  73.         {
  74.             ui i;
  75.             for (i = 0; i < d; i++)
  76.             {
  77.                 delete[] C[i];
  78.             }
  79.             delete[] C;
  80.             for (i = 0; i < d; i++)
  81.             {
  82.                 delete[] hashes[i];
  83.             }
  84.             delete[] hashes;
  85.         }
  86.         /*
  87.          * Update Int Value
  88.          */
  89.         void update(int item, int c)
  90.         {
  91.             total = total + c;
  92.             ui hashval = NULL;
  93.             for (ui j = 0; j < d; j++)
  94.             {
  95.                 hashval = (hashes[j][0] * item + hashes[j][1]) % w;
  96.                 C[j][hashval] = C[j][hashval] + c;
  97.             }
  98.         }
  99.  
  100.         /*
  101.          * Update Str Data
  102.          */
  103.         void update(const char *str, int c)
  104.         {
  105.             int hashval = hashstr(str);
  106.             update(hashval, c);
  107.         }
  108.         /*
  109.          * Estimate Min Value
  110.          */
  111.         ui estimate(int item)
  112.         {
  113.             int minval = numeric_limits<int>::max();
  114.             unsigned int hashval = NULL;
  115.             for (unsigned int j = 0; j < d; j++)
  116.             {
  117.                 hashval = (hashes[j][0] * item + hashes[j][1]) % w;
  118.                 minval = MIN(minval, C[j][hashval]);
  119.             }
  120.             return minval;
  121.         }
  122.         /*
  123.          * Estimate Min Str Value
  124.          */
  125.         ui estimate(const char *str)
  126.         {
  127.             int hashval = hashstr(str);
  128.             return estimate(hashval);
  129.         }
  130.         /*
  131.          * Total Count 
  132.          */
  133.         ui totalcount()
  134.         {
  135.             return total;
  136.         }
  137.  
  138.         /*
  139.          * Hashing String to Int
  140.          */        
  141.         ui hashstr(const char *str)
  142.         {
  143.             unsigned long hash = 5381;
  144.             int c;
  145.             while (c = *str++)
  146.             {
  147.                 hash = ((hash << 5) + hash) + c;
  148.             }
  149.             return hash;
  150.         }
  151. };
  152.  
  153. /*
  154. * Generate Random Hashes
  155. */
  156. void CountMinSketch::genajbj(int** hashes, int i)
  157. {
  158.     hashes[i][0] = int(float(rand())*float(LONG_PRIME)/float(RAND_MAX) + 1);
  159.     hashes[i][1] = int(float(rand())*float(LONG_PRIME)/float(RAND_MAX) + 1);
  160. }
  161. /*
  162. * Main Contains menu
  163. */
  164. int main()
  165. {
  166.     const char *ar_str[] = { "hello", "some", "one", "hello", "alice",
  167.                              "one", "lady", "let", "us", "lady",
  168.                              "alice", "in", "us", "wonderland", "lady",
  169.                              "lady", "some", "hello", "none", "pie" };
  170.     CountMinSketch c(0.01, 0.1);
  171.     ui i, total = 0, count;
  172.     map<const char *, int> mapitems;
  173.     map<const char *, int>::const_iterator it;
  174.     for (i = 0; i < 20; i++)
  175.     {
  176.         if ((it = mapitems.find(ar_str[i])) != mapitems.end())
  177.         {
  178.             mapitems[ar_str[i]] += i;
  179.         }
  180.         else
  181.         {
  182.             mapitems[ar_str[i]] = i;
  183.         }
  184.         c.update(ar_str[i], i);
  185.         total = total + i;
  186.  
  187.     }
  188.     // 1. test for items in ar_str
  189.     for (it = mapitems.begin(); it != mapitems.end(); it++)
  190.     {
  191.         if (c.estimate(it->first) != mapitems[it->first])
  192.         {
  193.             cout << "Incorrect count for " << it->first << ":->error: ";
  194.             count = abs(int(c.estimate(it->first)-mapitems[it->first]));
  195.             cout << count << endl;
  196.         }
  197.         else
  198.         {
  199.             cout << "Correct count for '" << it->first;
  200.             cout << "' :-->count: " << c.estimate(it->first) << endl;
  201.         }
  202.     }
  203.     cout << "c.totalcount()==total ? ";
  204.     cout << (c.totalcount() == total ? "True" : "False") << endl;
  205.     cout << "Sketch Total: " << c.totalcount() << endl;
  206.  
  207.     // 2. test for items not in ar_str
  208.     cout << "Testing for strings not in ar_str..." << endl;
  209.     const char *arn_str[] = {"sanfoundry", "hello", "linux", "us"};
  210.     for (i = 0 ; i < 4; i++)
  211.     {
  212.         if (!c.estimate(arn_str[i]))
  213.             cout<<arn_str[i] << " not found in string array"<<endl;
  214.         else
  215.             cout<<arn_str[i] << " found in string array"<<endl;
  216.     }
  217.     return 0;
  218. }

advertisement
$ g++ countmin.cpp
$ a.out
Correct count for 'hello' :-->count: 20
Correct count for 'us' :-->count: 20
Correct count for 'some' :-->count: 17
Correct count for 'one' :-->count: 7
Correct count for 'alice' :-->count: 14
Correct count for 'lady' :-->count: 44
Correct count for 'let' :-->count: 7
Correct count for 'in' :-->count: 11
Correct count for 'wonderland' :-->count: 13
Correct count for 'none' :-->count: 18
Correct count for 'pie' :-->count: 19
c.totalcount()==total ? True
Sketch Total: 190
Testing for strings not in ar_str...
sanfoundry not found in string array
hello found in string array
linux not found in string array
us found in string array
 
------------------
(program exited with code: 0)
Press return to continue

Sanfoundry Global Education & Learning Series – 1000 C++ Programs.

advertisement
If you wish to look at all C++ Programming examples, go to C++ Programs.

advertisement
advertisement
advertisement
Manish Bhojasia, a technology veteran with 20+ years @ Cisco & Wipro, is Founder and CTO at Sanfoundry. He is Linux Kernel Developer & SAN Architect and is passionate about competency developments in these areas. He lives in Bangalore and delivers focused training sessions to IT professionals in Linux Kernel, Linux Debugging, Linux Device Drivers, Linux Networking, Linux Storage, Advanced C Programming, SAN Storage Technologies, SCSI Internals & Storage Protocols such as iSCSI & Fiber Channel. Stay connected with him @ LinkedIn