Skip to content

Commit f2bd96e

Browse files
committed
Day-20 Robin Karp algorithm
1 parent 5b715f6 commit f2bd96e

File tree

1 file changed

+128
-0
lines changed

1 file changed

+128
-0
lines changed
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
/*
2+
* Given a string pattern(P) and large Text string (T), Write a function search( P , T) which provide all the occurances of P in T.
3+
* example : T => "AABAACAADAABAAABAA".
4+
* P => "AABA"
5+
* Output : 0, 9, 13 ( all indices of T where pattern string P is starts to match.
6+
*
7+
* Approach:
8+
* Lets say size of T ==> N
9+
* Size of P ==> M.
10+
* Lets have a hash function --> hash.
11+
* Step 1 :We will calculate hash of Pattern P, lets say it is p
12+
* Step 2 : Then we will calculate hash of text portion from T[0-->M-1]. lets say t(0)
13+
* Step 3: if ( p == t(0) ) if they match, add it to list of occurances.
14+
* Step 4: Go back to step 2, and calculate t(1) i.e hash of T[1-->M] using t(0) in O(1).
15+
*
16+
* The question remains, how do we calculate t(1) from t(0) in O(1), we do it using Horner's rule
17+
* H[m] = X[m]+ 10 (X[m-1] + 10(X[m-2]+……10(X[2] + 10 X[1]….))) —-> The 10 is the number of characters
18+
*
19+
* By Induction, we can calculate
20+
* t(s+1) = 10 ( t(s) - 10^(m-1) * T[s] ) + T[s+m+1]
21+
*
22+
* so for example
23+
* T = "123456", and m = 3
24+
* T(0) = 123
25+
* T(1) = 10 * ( 123 - 100 * 1) + 4 = 234
26+
*
27+
* So in our case number of character can be 256
28+
* There t(s+1) = 256 ( t(s) - 256 ^ (m-1) * T[s] ) + T[s+m+1]
29+
*
30+
* alphabet = 256;
31+
* In our program we will precalculate 256 ^ m-1;
32+
* h = pow(256, m-1)
33+
*
34+
*/
35+
36+
37+
#include<iostream>
38+
#include<vector>
39+
40+
41+
//alphabet is total characters in alphabet.
42+
const int alphabet = 256;
43+
//a large prime number
44+
const int q = 101;
45+
46+
47+
std::vector<int> search(const std::string pattern, const std::string text)
48+
{
49+
int M = pattern.size();
50+
int N = text.size();
51+
long h = 1; //hash val
52+
int p = 0; //hash value of pattern;
53+
int t = 0; //hash value of current text substring of size m;
54+
std::vector<int> indices; // store all the indices of text where pattern matched.
55+
56+
//hash value - pow( alphabet, m-1) % q;
57+
for ( int i = 0; i < M - 1; ++i ) {
58+
h = (alphabet * h) % q;
59+
}
60+
61+
//initial hash values of pattern and text substring
62+
for ( int i = 0; i < M ; ++i ) {
63+
p = ( alphabet * p + pattern[i] ) % q;
64+
t = ( alphabet * t + text[i] ) % q;
65+
}
66+
67+
//Slide the pattern over text
68+
for ( int i = 0; i <= N - M; ++i ) {
69+
70+
int j = 0;
71+
//if hash matches, check the chars one by one.
72+
if ( p == t ) {
73+
for (j = 0; j < M ; ++j ) {
74+
if ( pattern[j] != text[i+j] ) {
75+
break;
76+
}
77+
}
78+
//pattern and text portion match
79+
if ( j == M ) {
80+
indices.push_back(i);
81+
}
82+
} else {
83+
//calculate the next t
84+
85+
t = ( alphabet * ( t - text[i] * h) + text[i+M] ) % q;
86+
// in case current t is negative
87+
if ( t < 0 ) {
88+
t = ( t + q);
89+
}
90+
}
91+
92+
}
93+
return indices;
94+
95+
}
96+
97+
void printIndices(std::vector<int> indices,
98+
const std::string pattern,
99+
const std::string text)
100+
{
101+
if ( indices.size() == 0 ) {
102+
std::cout << "\"" << pattern << "\" does not occur in \"" << text << "\"" << std::endl;
103+
} else {
104+
std::cout << "\"" << pattern << "\" occurs in \"" << text << "\" at following position(s):";
105+
for ( auto i : indices ) {
106+
std::cout << i << " ";
107+
}
108+
std::cout << std::endl;
109+
}
110+
}
111+
112+
int main()
113+
{
114+
std::string txt1("AABAACAADAABAAABAA");
115+
std::string pat1("AABA");
116+
std::string txt2("Hello World Hello World , All is great in World");
117+
std::string pat2("World");
118+
std::string txt3("GEEKS FOR GEEKS");
119+
std::string pat3("GEEKS");
120+
121+
std::vector<int> indices1 = search(pat1, txt1);
122+
printIndices(indices1, pat1, txt1);
123+
std::vector<int> indices2 = search(pat2, txt2);
124+
printIndices(indices2, pat2, txt2);
125+
std::vector<int> indices3 = search(pat3, txt3);
126+
printIndices(indices3, pat3, txt3);
127+
return 0;
128+
}

0 commit comments

Comments
 (0)