C++ Program to Implement Suffix Tree

This C++ Program demonstrates the implementation of Suffix Tree.

Here is source code of the C++ Program to demonstrate the implementation of Suffix Tree. The C++ program is successfully compiled and run on a Linux system. The program output is also shown below.

  1. /*
  2.  * C++ Program to Implement Suffix Tree
  3.  */
  4. #include <iostream>
  5. #include <cstdlib>
  6. #include <cstring>
  7. #include <string>
  8.  
  9. using namespace std;
  10. typedef unsigned char byte;
  11. /* 
  12.  * SuffixNode Declaration
  13.  */
  14. class SuffixNode 
  15. {
  16.     public:
  17.         int depth, begin, end;
  18.         SuffixNode **children;
  19.         SuffixNode *parent, *suffixLink;  
  20.         /* 
  21.          * Constructor 
  22.          */
  23.         SuffixNode(int begin, int end, int depth, SuffixNode *parent) 
  24.         {
  25.             children = new SuffixNode* [38];
  26.             this->begin = begin;
  27.             this->end = end;
  28.             this->parent = parent;
  29.             this->depth = depth;
  30.         }
  31.  
  32.         bool contains(int d)
  33.         {
  34.             return depth <= d && d < depth + (end - begin);
  35.         }    
  36. };
  37. string alphabet; 
  38. int alphabetSize;
  39. int lcsLength;
  40. int lcsBeginIndex;
  41.  
  42. /* 
  43.  * Class SuffixTree Declaration
  44.  */
  45. class SuffixTree 
  46. {
  47.     public:
  48.         /* 
  49.          * Funtion to build suffix tree for given text 
  50.          */
  51.         SuffixNode *buildSuffixTree(string s) 
  52.         {
  53.             int n = s.length();
  54.             int *a = new int[n];
  55.             for (int i = 0; i < n; i++) 
  56.             {
  57.                 a[i] = alphabet.find(s.at(i));
  58.             }
  59.             SuffixNode *root = new SuffixNode(0, 0, 0, NULL);
  60.             SuffixNode *cn = root;
  61.             root->suffixLink = root;
  62.             SuffixNode *needsSuffixLink = NULL;
  63.             int lastRule = 0;
  64.             int j = 0;
  65.             for (int i = -1; i < n - 1; i++) 
  66.             {
  67.                 int cur = a[i + 1]; 
  68.                 for (; j <= i + 1; j++) 
  69.                 {
  70.                     int curDepth = i + 1 - j;
  71.                     if (lastRule != 3) 
  72.                     {
  73.                         if (cn->suffixLink != NULL) 
  74.                             cn = cn->suffixLink;
  75.                         else
  76.                             cn = cn->parent->suffixLink;
  77.                         int k = j + cn->depth;
  78.                         while (curDepth > 0 && !cn->contains(curDepth - 1)) 
  79.                         {
  80.                             k += cn->end - cn->begin;
  81.                             cn = cn->children[a[k]];
  82.                         }
  83.                     }
  84.                     if (!cn->contains(curDepth)) 
  85.                     { 
  86.                         if (needsSuffixLink != NULL) 
  87.                         {
  88.                             needsSuffixLink->suffixLink = cn;
  89.                             needsSuffixLink = NULL;
  90.                         }
  91.                         if (cn->children[cur] == NULL) 
  92.                         {
  93.                             cn->children[cur] = new SuffixNode(i + 1, n, curDepth, cn);
  94.                             lastRule = 2;
  95.                         }
  96.                         else 
  97.                         {    
  98.                             cn = cn->children[cur];
  99.                             lastRule = 3;
  100.                             break;
  101.                         }
  102.                     }
  103.                     else 
  104.                     { 
  105.                         int end = cn->begin + curDepth - cn->depth;
  106.                         if (a[end] != cur) 
  107.                         { 
  108.                             SuffixNode *newn = new SuffixNode(cn->begin, end, cn->depth, cn->parent);
  109.                             newn->children[cur] = new SuffixNode(i + 1, n, curDepth, newn);
  110.                             newn->children[a[end]] = cn;
  111.                             cn->parent->children[a[cn->begin]] = newn;
  112.                             if (needsSuffixLink != NULL) 
  113.                                 needsSuffixLink->suffixLink = newn;
  114.                             cn->begin = end;
  115.                             cn->depth = curDepth;
  116.                             cn->parent = newn;
  117.                             cn = needsSuffixLink = newn;
  118.                             lastRule = 2;
  119.                         } 
  120.                         else if (cn->end != n || (cn->begin - cn->depth) < j) 
  121.                         {
  122.                             lastRule = 3;
  123.                             break;
  124.                         }
  125.                         else
  126.                             lastRule = 1;                      
  127.                     }
  128.                 }
  129.             }
  130.             root->suffixLink = NULL;
  131.             return root;
  132.         }        
  133.         /* 
  134.          * Funtion to find longest common substring 
  135.          */
  136.         int findLCS(SuffixNode *node, int i1, int i2) 
  137.         {
  138.             if (node->begin <= i1 && i1 < node->end) 
  139.                 return 1;
  140.             if (node->begin <= i2 && i2 < node->end) 
  141.                 return 2;
  142.             int mask = 0;
  143.             for (int f = 0; f < alphabetSize; f++)
  144.             { 
  145.                 if (node->children[f] != NULL) 
  146.                 {   
  147.                     mask |= findLCS(node->children[f], i1, i2);
  148.                 }
  149.             }
  150.             if (mask == 3)
  151.             {
  152.                 int curLength = node->depth + node->end - node->begin;
  153.                 if (lcsLength < curLength) 
  154.                 {
  155.                     lcsLength = curLength;
  156.                     lcsBeginIndex = node->begin;
  157.                 }
  158.             }
  159.             return mask;
  160.         }
  161.  
  162.         /* 
  163.          * Funtion to find longest common substring 
  164.          */
  165.         void findLCS(string s1, string s2)
  166.         {
  167.             string x = "-";
  168.             string y = "#";
  169.             string ns1 = s1;
  170.             string ns2 = s2;
  171.             string s = s1.append(x.append(s2.append(y)));
  172.             SuffixNode *root = buildSuffixTree(s);
  173.             lcsLength = 0;
  174.             lcsBeginIndex = 0;               
  175.             findLCS(root, ns1.length(), ns1.length() + ns2.length() + 1);
  176.             bool chk = lcsLength > 0;
  177.             if (chk)
  178.             {
  179.                 cout<<"\nLongest substring is "<<s.substr(lcsBeginIndex , lcsLength);
  180.                 cout<<endl; 
  181.             }
  182.             else
  183.             {
  184.                 cout<<"No substring found"<<endl;
  185.             }
  186.         }
  187. };
  188.  
  189. /* 
  190.  * Main 
  191. */
  192. int main()
  193. {
  194.     alphabet = "abcdefghijklmnopqrstuvwxyz1234567890-#";
  195.     alphabetSize = alphabet.length();
  196.     string s1,s2;
  197.     cout<<"Finding longest common substring using suffix trees\n";
  198.     cout<<"Enter 1st String: ";
  199.     cin>>s1; 
  200.     cout<<"Enter 2nd String: ";
  201.     cin>>s2;
  202.     SuffixTree st;
  203.     st.findLCS(s1, s2);         
  204.     return 0;
  205. }

$ g++ suffixtree.cpp
$ a.out
 
Finding longest common substring using suffix trees
Enter 1st String: sanfoundry
Enter 2nd String: founder
 
Longest substring is found
 
 
------------------
(program exited with code: 1)
Press return to continue

Sanfoundry Global Education & Learning Series – 1000 C++ Programs.

advertisement
advertisement
If you wish to look at all C++ Programming examples, go to C++ Programs.

If you find any mistake above, kindly email to [email protected]

advertisement
advertisement
Subscribe to our Newsletters (Subject-wise). Participate in the Sanfoundry Certification contest to get free Certificate of Merit. Join our social networks below and stay updated with latest contests, videos, internships and jobs!

Youtube | Telegram | LinkedIn | Instagram | Facebook | Twitter | Pinterest
Manish Bhojasia - Founder & CTO at Sanfoundry
Manish Bhojasia, a technology veteran with 20+ years @ Cisco & Wipro, is Founder and CTO at Sanfoundry. He lives in Bangalore, and focuses on development of Linux Kernel, SAN Technologies, Advanced C, Data Structures & Alogrithms. Stay connected with him at LinkedIn.

Subscribe to his free Masterclasses at Youtube & discussions at Telegram SanfoundryClasses.