Merge pull request #199 from Jatin86400/Z-algorithm

prateekiiest · web-flow · commit 67d46584ac81 · 2017-12-14T13:47:58.000+05:30
added Z-algorithm
diff --git a/Competitive Coding/Strings/String Search/Z-algorithm/Readme.md b/Competitive Coding/Strings/String Search/Z-algorithm/Readme.md
@@ -0,0 +1,37 @@
+# Z Algorithm
+
+This algorithm finds all occurrences of a pattern in a text in linear time. Let length of text be n and of pattern be m, then total time taken is O(m + n) with linear space complexity. Now we can see that both time and space complexity is same as KMP algorithm but this algorithm is Simpler to understand.
+
+In this algorithm, we construct a Z array.
+
+# What is Z array?
+
+For a string str[0..n-1], Z array is of same length as string. An element Z[i] of Z array stores length of the longest substring starting from str[i] which is also a prefix of str[0..n-1]. The first entry of Z array is meaning less as complete string is always prefix of itself.
+Example:
+Index            0   1   2   3   4   5   6   7   8   9  10  11 
+Text             a   a   b   c   a   a   b   x   a   a   a   z
+Z values         X   1   0   0   3   1   0   0   2   2   1   0 
+
+# How to construct Z array?
+
+The idea is to maintain an interval [L, R] which is the interval with max R
+such that [L,R] is prefix substring (substring which is also prefix). 
+
+Steps for maintaining this interval are as follows – 
+
+1) If i > R then there is no prefix substring that starts before i and 
+   ends after i, so we reset L and R and compute new [L,R] by comparing 
+   str[0..] to str[i..] and get Z[i] (= R-L+1). 
+
+2) If i <= R then let K = i-L,  now Z[i] >= min(Z[K], R-i+1)  because 
+   str[i..] matches with str[K..] for atleast R-i+1 characters (they are in
+   [L,R] interval which we know is a prefix substring).      
+   Now two sub cases arise – 
+      a) If Z[K] < R-i+1  then there is no prefix substring starting at 
+         str[i] (otherwise Z[K] would be larger)  so  Z[i] = Z[K]  and 
+         interval [L,R] remains same.
+      b) If Z[K] >= R-i+1 then it is possible to extend the [L,R] interval
+         thus we will set L as i and start matching from str[R]  onwards  and
+         get new R then we will update interval [L,R] and calculate Z[i] (=R-L+1)
+
+The algorithm runs in linear time because we never compare character less than R and with matching we increase R by one so there are at most T comparisons. In mismatch case, mismatch happen only once for each i (because of which R stops), that’s another at most T comparison making overall linear complexity.
diff --git a/Competitive Coding/Strings/String Search/Z-algorithm/z-algorithm.cpp b/Competitive Coding/Strings/String Search/Z-algorithm/z-algorithm.cpp
@@ -0,0 +1,84 @@
+#include<bits/stdc++.h>
+using namespace std;
+#define fastio ios_base::sync_with_stdio(false);cin.tie(0);cout.tie(0)
+#define md 1000000007
+#define ll long long int
+#define vi vector<int>
+#define vll vector<i64>
+#define pb push_back
+#define all(c) (c).begin(),(c).end()
+template< class T > T max2(const T &a,const T &b) {return (a < b ? b : a);}
+template< class T > T min2(const T &a,const T &b) {return (a > b ? b : a);}
+template< class T > T max3(const T &a, const T &b, const T &c) { return max2(a, max2(b, c)); }
+template< class T > T min3(const T &a, const T &b, const T &c) { return min2(a, min2(b, c)); }
+template< class T > T gcd(const T a, const T b) { return (b ? gcd<T>(b, a%b) : a); }
+template< class T > T lcm(const T a, const T b) { return (a / gcd<T>(a, b) * b); }
+template< class T > T mod(const T &a, const T &b) { return (a < b ? a : a % b); }
+typedef pair<ll,ll> pi;
+int main()
+{
+    fastio;
+    string txt;
+    string pat;
+    getline(cin,txt);//getline() reads the complete line in contrary to the traditional cin function which reads just the string before any spaces
+    getline(cin,pat);
+    int n = txt.length();
+    int pat_len = pat.length();
+    string str = pat + "$" + txt;//This is the new string that is formed after merging the pattern, '$' and txt string . we can use any other symbol instead of '$'.I have used dollar sign because it occurs rarely in the txt string
+
+    int len = n+pat_len +1;//length of the total output string
+    int z_val[len]={0};
+    int left =0;//left index of the z box
+    int right =0;//right index of the z box
+    int count=0;//count of the match
+    for(int i=1;i<len;i++)
+    {
+        int curr = i;
+        if(count>1)
+        {
+            left =i;
+            right = i + count-2;
+        }
+
+
+        if(count<=1)
+        {
+            count=0;
+        for(int j=0;j<curr && j<len;j++)
+        {
+            if(str[j]==str[curr])
+            {
+                count++;
+                curr++;
+            }
+            else
+            {
+                z_val[i]=count;
+                break;
+
+            }
+        }
+        }
+        else
+        {
+            for(int j=left;j<=right; j++)
+            {
+                if(z_val[j-left]+j<right)//looks for the edge cases when the string index + the assigned z value surpasses the right index..this is not possible.So we need to check the match for given letter saperately.
+                    z_val[j]=z_val[j-left];
+                else
+                {
+                    count=0;
+                    i=j-1;
+                    break;
+                }
+            }
+        }
+    }
+    for(int i=0;i<len;i++)
+    {
+        if(z_val[i]==pat_len)//This indicates the index where the string occurence takes place
+            cout<<i-pat_len<<endl;
+    }
+
+
+}