|
| 1 | +/* |
| 2 | + * Given a string pattern(P) and large Text string (T), Write a function search( P , T) which provide all the occurances of P in T. |
| 3 | + * example : T => "AABAACAADAABAAABAA". |
| 4 | + * P => "AABA" |
| 5 | + * Output : 0, 9, 13 ( all indices of T where pattern string P is starts to match. |
| 6 | + * |
| 7 | + * Approach: |
| 8 | + * Lets say size of T ==> N |
| 9 | + * Size of P ==> M. |
| 10 | + * Lets have a hash function --> hash. |
| 11 | + * Step 1 :We will calculate hash of Pattern P, lets say it is p |
| 12 | + * Step 2 : Then we will calculate hash of text portion from T[0-->M-1]. lets say t(0) |
| 13 | + * Step 3: if ( p == t(0) ) if they match, add it to list of occurances. |
| 14 | + * Step 4: Go back to step 2, and calculate t(1) i.e hash of T[1-->M] using t(0) in O(1). |
| 15 | + * |
| 16 | + * The question remains, how do we calculate t(1) from t(0) in O(1), we do it using Horner's rule |
| 17 | + * H[m] = X[m]+ 10 (X[m-1] + 10(X[m-2]+……10(X[2] + 10 X[1]….))) —-> The 10 is the number of characters |
| 18 | + * |
| 19 | + * By Induction, we can calculate |
| 20 | + * t(s+1) = 10 ( t(s) - 10^(m-1) * T[s] ) + T[s+m+1] |
| 21 | + * |
| 22 | + * so for example |
| 23 | + * T = "123456", and m = 3 |
| 24 | + * T(0) = 123 |
| 25 | + * T(1) = 10 * ( 123 - 100 * 1) + 4 = 234 |
| 26 | + * |
| 27 | + * So in our case number of character can be 256 |
| 28 | + * There t(s+1) = 256 ( t(s) - 256 ^ (m-1) * T[s] ) + T[s+m+1] |
| 29 | + * |
| 30 | + * alphabet = 256; |
| 31 | + * In our program we will precalculate 256 ^ m-1; |
| 32 | + * h = pow(256, m-1) |
| 33 | + * |
| 34 | + */ |
| 35 | + |
| 36 | + |
| 37 | +#include<iostream> |
| 38 | +#include<vector> |
| 39 | + |
| 40 | + |
| 41 | +//alphabet is total characters in alphabet. |
| 42 | +const int alphabet = 256; |
| 43 | +//a large prime number |
| 44 | +const int q = 101; |
| 45 | + |
| 46 | + |
| 47 | +std::vector<int> search(const std::string pattern, const std::string text) |
| 48 | +{ |
| 49 | + int M = pattern.size(); |
| 50 | + int N = text.size(); |
| 51 | + long h = 1; //hash val |
| 52 | + int p = 0; //hash value of pattern; |
| 53 | + int t = 0; //hash value of current text substring of size m; |
| 54 | + std::vector<int> indices; // store all the indices of text where pattern matched. |
| 55 | + |
| 56 | + //hash value - pow( alphabet, m-1) % q; |
| 57 | + for ( int i = 0; i < M - 1; ++i ) { |
| 58 | + h = (alphabet * h) % q; |
| 59 | + } |
| 60 | + |
| 61 | + //initial hash values of pattern and text substring |
| 62 | + for ( int i = 0; i < M ; ++i ) { |
| 63 | + p = ( alphabet * p + pattern[i] ) % q; |
| 64 | + t = ( alphabet * t + text[i] ) % q; |
| 65 | + } |
| 66 | + |
| 67 | + //Slide the pattern over text |
| 68 | + for ( int i = 0; i <= N - M; ++i ) { |
| 69 | + |
| 70 | + int j = 0; |
| 71 | + //if hash matches, check the chars one by one. |
| 72 | + if ( p == t ) { |
| 73 | + for (j = 0; j < M ; ++j ) { |
| 74 | + if ( pattern[j] != text[i+j] ) { |
| 75 | + break; |
| 76 | + } |
| 77 | + } |
| 78 | + //pattern and text portion match |
| 79 | + if ( j == M ) { |
| 80 | + indices.push_back(i); |
| 81 | + } |
| 82 | + } else { |
| 83 | + //calculate the next t |
| 84 | + |
| 85 | + t = ( alphabet * ( t - text[i] * h) + text[i+M] ) % q; |
| 86 | + // in case current t is negative |
| 87 | + if ( t < 0 ) { |
| 88 | + t = ( t + q); |
| 89 | + } |
| 90 | + } |
| 91 | + |
| 92 | + } |
| 93 | + return indices; |
| 94 | + |
| 95 | +} |
| 96 | + |
| 97 | +void printIndices(std::vector<int> indices, |
| 98 | + const std::string pattern, |
| 99 | + const std::string text) |
| 100 | +{ |
| 101 | + if ( indices.size() == 0 ) { |
| 102 | + std::cout << "\"" << pattern << "\" does not occur in \"" << text << "\"" << std::endl; |
| 103 | + } else { |
| 104 | + std::cout << "\"" << pattern << "\" occurs in \"" << text << "\" at following position(s):"; |
| 105 | + for ( auto i : indices ) { |
| 106 | + std::cout << i << " "; |
| 107 | + } |
| 108 | + std::cout << std::endl; |
| 109 | + } |
| 110 | +} |
| 111 | + |
| 112 | +int main() |
| 113 | +{ |
| 114 | + std::string txt1("AABAACAADAABAAABAA"); |
| 115 | + std::string pat1("AABA"); |
| 116 | + std::string txt2("Hello World Hello World , All is great in World"); |
| 117 | + std::string pat2("World"); |
| 118 | + std::string txt3("GEEKS FOR GEEKS"); |
| 119 | + std::string pat3("GEEKS"); |
| 120 | + |
| 121 | + std::vector<int> indices1 = search(pat1, txt1); |
| 122 | + printIndices(indices1, pat1, txt1); |
| 123 | + std::vector<int> indices2 = search(pat2, txt2); |
| 124 | + printIndices(indices2, pat2, txt2); |
| 125 | + std::vector<int> indices3 = search(pat3, txt3); |
| 126 | + printIndices(indices3, pat3, txt3); |
| 127 | + return 0; |
| 128 | +} |
0 commit comments