Docs
/
DSA Advanced
Chapter 13
13 — String Algorithms
KMP (Knuth-Morris-Pratt)
Pattern matching in O(n + m) using failure function (LPS array).
function kmpSearch(text: string, pattern: string): number[] {
const lps = buildLPS(pattern);
const matches: number[] = [];
let j = 0;
for (let i = 0; i < text.length; i++) {
while (j > 0 && text[i] !== pattern[j]) j = lps[j - 1];
if (text[i] === pattern[j]) j++;
if (j === pattern.length) {
matches.push(i - j + 1);
j = lps[j - 1];
}
}
return matches;
}
function buildLPS(pattern: string): number[] {
const lps = new Array(pattern.length).fill(0);
let len = 0, i = 1;
while (i < pattern.length) {
if (pattern[i] === pattern[len]) {
lps[i++] = ++len;
} else if (len > 0) {
len = lps[len - 1]; // Don't increment i
} else {
lps[i++] = 0;
}
}
return lps;
}
// "ABABCABAB" → LPS: [0,0,1,2,0,1,2,3,4]
Rabin-Karp (Rolling Hash)
function rabinKarp(text: string, pattern: string): number[] {
const MOD = 1e9 + 7;
const BASE = 31;
const n = text.length, m = pattern.length;
if (m > n) return [];
// Compute pattern hash and first window hash
let patHash = 0, winHash = 0, power = 1;
for (let i = 0; i < m; i++) {
patHash = (patHash * BASE + pattern.charCodeAt(i)) % MOD;
winHash = (winHash * BASE + text.charCodeAt(i)) % MOD;
if (i > 0) power = (power * BASE) % MOD;
}
const matches: number[] = [];
for (let i = 0; i <= n - m; i++) {
if (winHash === patHash && text.substring(i, i + m) === pattern) {
matches.push(i);
}
if (i < n - m) {
winHash = ((winHash - text.charCodeAt(i) * power % MOD + MOD) * BASE + text.charCodeAt(i + m)) % MOD;
}
}
return matches;
}
Z-Algorithm
z[i] = length of longest substring starting at i that matches a prefix of the string.
function zFunction(s: string): number[] {
const n = s.length;
const z = new Array(n).fill(0);
let l = 0, r = 0;
for (let i = 1; i < n; i++) {
if (i < r) z[i] = Math.min(r - i, z[i - l]);
while (i + z[i] < n && s[z[i]] === s[i + z[i]]) z[i]++;
if (i + z[i] > r) { l = i; r = i + z[i]; }
}
return z;
}
// Pattern matching with Z: concat pattern + "$" + text
function zSearch(text: string, pattern: string): number[] {
const s = pattern + '$' + text;
const z = zFunction(s);
const matches: number[] = [];
for (let i = pattern.length + 1; i < s.length; i++) {
if (z[i] === pattern.length) matches.push(i - pattern.length - 1);
}
return matches;
}
Suffix Array
Sorted array of all suffixes — enables efficient substring search.
// Build suffix array in O(n log²n)
function buildSuffixArray(s: string): number[] {
const n = s.length;
let sa = Array.from({ length: n }, (_, i) => i);
let rank = Array.from(s, ch => ch.charCodeAt(0));
let tmp = new Array(n);
for (let k = 1; k < n; k <<= 1) {
const r = [...rank];
const cmp = (a: number, b: number) => {
if (r[a] !== r[b]) return r[a] - r[b];
const ra = a + k < n ? r[a + k] : -1;
const rb = b + k < n ? r[b + k] : -1;
return ra - rb;
};
sa.sort(cmp);
tmp[sa[0]] = 0;
for (let i = 1; i < n; i++) {
tmp[sa[i]] = tmp[sa[i - 1]] + (cmp(sa[i - 1], sa[i]) < 0 ? 1 : 0);
}
rank = [...tmp];
}
return sa;
}
Rolling Hash
class RollingHash {
private hash = 0;
private power = 1;
private readonly BASE = 31;
private readonly MOD = 1e9 + 7;
private chars: number[] = [];
add(ch: string) {
this.hash = (this.hash * this.BASE + ch.charCodeAt(0)) % this.MOD;
this.chars.push(ch.charCodeAt(0));
if (this.chars.length > 1) this.power = (this.power * this.BASE) % this.MOD;
}
removeFirst() {
this.hash = ((this.hash - this.chars[0] * this.power % this.MOD) + this.MOD) % this.MOD;
this.chars.shift();
if (this.chars.length > 0) this.power = (this.power / this.BASE); // Use modular inverse in practice
}
value(): number { return this.hash; }
}
Use cases: substring matching, longest duplicate substring, plagiarism detection.
Comparison
| Algorithm | Time | Use Case |
|---|---|---|
| KMP | O(n + m) | Single pattern match |
| Rabin-Karp | O(n + m) avg | Multiple pattern match, plagiarism |
| Z-Algorithm | O(n + m) | Pattern match, period of string |
| Suffix Array | O(n log²n) build | All substring queries |
| Rolling Hash | O(1) per slide | Sliding window on strings |
Key Takeaways
- KMP: build LPS array to skip redundant comparisons — O(n + m) guaranteed
- Rabin-Karp: rolling hash for O(1) window slide; verify on hash match
- Z-Algorithm: z[i] = prefix match length at position i; pattern match via concatenation
- Suffix Array: sorted suffixes enable binary search on substrings
- Choose by use case: single pattern → KMP, multiple patterns → Rabin-Karp, all substrings → Suffix Array