Docs
/
DSA Advanced
Chapter 13

13 — String Algorithms

KMP (Knuth-Morris-Pratt)

Pattern matching in O(n + m) using failure function (LPS array).

function kmpSearch(text: string, pattern: string): number[] {
  const lps = buildLPS(pattern);
  const matches: number[] = [];
  let j = 0;
  
  for (let i = 0; i < text.length; i++) {
    while (j > 0 && text[i] !== pattern[j]) j = lps[j - 1];
    if (text[i] === pattern[j]) j++;
    if (j === pattern.length) {
      matches.push(i - j + 1);
      j = lps[j - 1];
    }
  }
  return matches;
}

function buildLPS(pattern: string): number[] {
  const lps = new Array(pattern.length).fill(0);
  let len = 0, i = 1;
  
  while (i < pattern.length) {
    if (pattern[i] === pattern[len]) {
      lps[i++] = ++len;
    } else if (len > 0) {
      len = lps[len - 1]; // Don't increment i
    } else {
      lps[i++] = 0;
    }
  }
  return lps;
}

// "ABABCABAB" → LPS: [0,0,1,2,0,1,2,3,4]

Rabin-Karp (Rolling Hash)

function rabinKarp(text: string, pattern: string): number[] {
  const MOD = 1e9 + 7;
  const BASE = 31;
  const n = text.length, m = pattern.length;
  if (m > n) return [];
  
  // Compute pattern hash and first window hash
  let patHash = 0, winHash = 0, power = 1;
  for (let i = 0; i < m; i++) {
    patHash = (patHash * BASE + pattern.charCodeAt(i)) % MOD;
    winHash = (winHash * BASE + text.charCodeAt(i)) % MOD;
    if (i > 0) power = (power * BASE) % MOD;
  }
  
  const matches: number[] = [];
  for (let i = 0; i <= n - m; i++) {
    if (winHash === patHash && text.substring(i, i + m) === pattern) {
      matches.push(i);
    }
    if (i < n - m) {
      winHash = ((winHash - text.charCodeAt(i) * power % MOD + MOD) * BASE + text.charCodeAt(i + m)) % MOD;
    }
  }
  return matches;
}

Z-Algorithm

z[i] = length of longest substring starting at i that matches a prefix of the string.

function zFunction(s: string): number[] {
  const n = s.length;
  const z = new Array(n).fill(0);
  let l = 0, r = 0;
  
  for (let i = 1; i < n; i++) {
    if (i < r) z[i] = Math.min(r - i, z[i - l]);
    
    while (i + z[i] < n && s[z[i]] === s[i + z[i]]) z[i]++;
    
    if (i + z[i] > r) { l = i; r = i + z[i]; }
  }
  return z;
}

// Pattern matching with Z: concat pattern + "$" + text
function zSearch(text: string, pattern: string): number[] {
  const s = pattern + '$' + text;
  const z = zFunction(s);
  const matches: number[] = [];
  for (let i = pattern.length + 1; i < s.length; i++) {
    if (z[i] === pattern.length) matches.push(i - pattern.length - 1);
  }
  return matches;
}

Suffix Array

Sorted array of all suffixes — enables efficient substring search.

// Build suffix array in O(n log²n)
function buildSuffixArray(s: string): number[] {
  const n = s.length;
  let sa = Array.from({ length: n }, (_, i) => i);
  let rank = Array.from(s, ch => ch.charCodeAt(0));
  let tmp = new Array(n);
  
  for (let k = 1; k < n; k <<= 1) {
    const r = [...rank];
    const cmp = (a: number, b: number) => {
      if (r[a] !== r[b]) return r[a] - r[b];
      const ra = a + k < n ? r[a + k] : -1;
      const rb = b + k < n ? r[b + k] : -1;
      return ra - rb;
    };
    sa.sort(cmp);
    
    tmp[sa[0]] = 0;
    for (let i = 1; i < n; i++) {
      tmp[sa[i]] = tmp[sa[i - 1]] + (cmp(sa[i - 1], sa[i]) < 0 ? 1 : 0);
    }
    rank = [...tmp];
  }
  return sa;
}

Rolling Hash

class RollingHash {
  private hash = 0;
  private power = 1;
  private readonly BASE = 31;
  private readonly MOD = 1e9 + 7;
  private chars: number[] = [];
  
  add(ch: string) {
    this.hash = (this.hash * this.BASE + ch.charCodeAt(0)) % this.MOD;
    this.chars.push(ch.charCodeAt(0));
    if (this.chars.length > 1) this.power = (this.power * this.BASE) % this.MOD;
  }
  
  removeFirst() {
    this.hash = ((this.hash - this.chars[0] * this.power % this.MOD) + this.MOD) % this.MOD;
    this.chars.shift();
    if (this.chars.length > 0) this.power = (this.power / this.BASE); // Use modular inverse in practice
  }
  
  value(): number { return this.hash; }
}

Use cases: substring matching, longest duplicate substring, plagiarism detection.


Comparison

AlgorithmTimeUse Case
KMPO(n + m)Single pattern match
Rabin-KarpO(n + m) avgMultiple pattern match, plagiarism
Z-AlgorithmO(n + m)Pattern match, period of string
Suffix ArrayO(n log²n) buildAll substring queries
Rolling HashO(1) per slideSliding window on strings

Key Takeaways

  • KMP: build LPS array to skip redundant comparisons — O(n + m) guaranteed
  • Rabin-Karp: rolling hash for O(1) window slide; verify on hash match
  • Z-Algorithm: z[i] = prefix match length at position i; pattern match via concatenation
  • Suffix Array: sorted suffixes enable binary search on substrings
  • Choose by use case: single pattern → KMP, multiple patterns → Rabin-Karp, all substrings → Suffix Array