Skip to content

Commit dfaa495

Browse files
divyanshsaxena002Divyansh SaxenaDenizAltunkapan
authored
Refactor KMP and RabinKarp: Improve Reusability and Test Coverage (#7250)
* first commit * Running KMPTest and RabinKarpTest with fixed formatting * now build failed error resolved * now build failed error resolved 2 --------- Co-authored-by: Divyansh Saxena <[email protected]> Co-authored-by: Deniz Altunkapan <[email protected]>
1 parent f3fd9ca commit dfaa495

File tree

4 files changed

+119
-45
lines changed

4 files changed

+119
-45
lines changed

src/main/java/com/thealgorithms/strings/KMP.java

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
package com.thealgorithms.strings;
22

3+
import java.util.ArrayList;
4+
import java.util.List;
5+
36
/**
47
* Implementation of Knuth–Morris–Pratt algorithm Usage: see the main function
58
* for an example
@@ -8,16 +11,19 @@ public final class KMP {
811
private KMP() {
912
}
1013

11-
// a working example
12-
13-
public static void main(String[] args) {
14-
final String haystack = "AAAAABAAABA"; // This is the full string
15-
final String needle = "AAAA"; // This is the substring that we want to find
16-
kmpMatcher(haystack, needle);
17-
}
14+
/**
15+
* find the starting index in string haystack[] that matches the search word P[]
16+
*
17+
* @param haystack The text to be searched
18+
* @param needle The pattern to be searched for
19+
* @return A list of starting indices where the pattern is found
20+
*/
21+
public static List<Integer> kmpMatcher(final String haystack, final String needle) {
22+
List<Integer> occurrences = new ArrayList<>();
23+
if (haystack == null || needle == null || needle.isEmpty()) {
24+
return occurrences;
25+
}
1826

19-
// find the starting index in string haystack[] that matches the search word P[]
20-
public static void kmpMatcher(final String haystack, final String needle) {
2127
final int m = haystack.length();
2228
final int n = needle.length();
2329
final int[] pi = computePrefixFunction(needle);
@@ -32,10 +38,11 @@ public static void kmpMatcher(final String haystack, final String needle) {
3238
}
3339

3440
if (q == n) {
35-
System.out.println("Pattern starts: " + (i + 1 - n));
41+
occurrences.add(i + 1 - n);
3642
q = pi[q - 1];
3743
}
3844
}
45+
return occurrences;
3946
}
4047

4148
// return the prefix function
Lines changed: 27 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,30 @@
11
package com.thealgorithms.strings;
22

3-
import java.util.Scanner;
3+
import java.util.ArrayList;
4+
import java.util.List;
45

56
/**
67
* @author Prateek Kumar Oraon (https://github.com/prateekKrOraon)
78
*
8-
An implementation of Rabin-Karp string matching algorithm
9-
Program will simply end if there is no match
9+
* An implementation of Rabin-Karp string matching algorithm
10+
* Program will simply end if there is no match
1011
*/
1112
public final class RabinKarp {
1213
private RabinKarp() {
1314
}
1415

15-
public static Scanner scanner = null;
16-
public static final int ALPHABET_SIZE = 256;
16+
private static final int ALPHABET_SIZE = 256;
1717

18-
public static void main(String[] args) {
19-
scanner = new Scanner(System.in);
20-
System.out.println("Enter String");
21-
String text = scanner.nextLine();
22-
System.out.println("Enter pattern");
23-
String pattern = scanner.nextLine();
24-
25-
int q = 101;
26-
searchPat(text, pattern, q);
18+
public static List<Integer> search(String text, String pattern) {
19+
return search(text, pattern, 101);
2720
}
2821

29-
private static void searchPat(String text, String pattern, int q) {
22+
public static List<Integer> search(String text, String pattern, int q) {
23+
List<Integer> occurrences = new ArrayList<>();
24+
if (text == null || pattern == null || pattern.isEmpty()) {
25+
return occurrences;
26+
}
27+
3028
int m = pattern.length();
3129
int n = text.length();
3230
int t = 0;
@@ -35,48 +33,42 @@ private static void searchPat(String text, String pattern, int q) {
3533
int j = 0;
3634
int i = 0;
3735

38-
h = (int) Math.pow(ALPHABET_SIZE, m - 1) % q;
36+
if (m > n) {
37+
return new ArrayList<>();
38+
}
39+
40+
// h = pow(ALPHABET_SIZE, m-1) % q
41+
for (i = 0; i < m - 1; i++) {
42+
h = h * ALPHABET_SIZE % q;
43+
}
3944

4045
for (i = 0; i < m; i++) {
41-
// hash value is calculated for each character and then added with the hash value of the
42-
// next character for pattern as well as the text for length equal to the length of
43-
// pattern
4446
p = (ALPHABET_SIZE * p + pattern.charAt(i)) % q;
4547
t = (ALPHABET_SIZE * t + text.charAt(i)) % q;
4648
}
4749

4850
for (i = 0; i <= n - m; i++) {
49-
// if the calculated hash value of the pattern and text matches then
50-
// all the characters of the pattern is matched with the text of length equal to length
51-
// of the pattern if all matches then pattern exist in string if not then the hash value
52-
// of the first character of the text is subtracted and hash value of the next character
53-
// after the end of the evaluated characters is added
5451
if (p == t) {
55-
// if hash value matches then the individual characters are matched
5652
for (j = 0; j < m; j++) {
57-
// if not matched then break out of the loop
5853
if (text.charAt(i + j) != pattern.charAt(j)) {
5954
break;
6055
}
6156
}
6257

63-
// if all characters are matched then pattern exist in the string
6458
if (j == m) {
65-
System.out.println("Pattern found at index " + i);
59+
occurrences.add(i);
6660
}
6761
}
6862

69-
// if i<n-m then hash value of the first character of the text is subtracted and hash
70-
// value of the next character after the end of the evaluated characters is added to get
71-
// the hash value of the next window of characters in the text
7263
if (i < n - m) {
73-
t = (ALPHABET_SIZE * (t - text.charAt(i) * h) + text.charAt(i + m)) % q;
74-
75-
// if hash value becomes less than zero than q is added to make it positive
64+
t = (t - text.charAt(i) * h % q);
7665
if (t < 0) {
77-
t = (t + q);
66+
t += q;
7867
}
68+
t = t * ALPHABET_SIZE % q;
69+
t = (t + text.charAt(i + m)) % q;
7970
}
8071
}
72+
return occurrences;
8173
}
8274
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
package com.thealgorithms.strings;
2+
3+
import static org.junit.jupiter.api.Assertions.assertEquals;
4+
5+
import java.util.List;
6+
import org.junit.jupiter.api.Test;
7+
8+
public class KMPTest {
9+
10+
@Test
11+
public void testNullInputs() {
12+
assertEquals(List.of(), KMP.kmpMatcher(null, "A"));
13+
assertEquals(List.of(), KMP.kmpMatcher("A", null));
14+
assertEquals(List.of(), KMP.kmpMatcher(null, null));
15+
}
16+
17+
@Test
18+
public void testKMPMatcher() {
19+
assertEquals(List.of(0, 1), KMP.kmpMatcher("AAAAABAAABA", "AAAA"));
20+
assertEquals(List.of(0, 3), KMP.kmpMatcher("ABCABC", "ABC"));
21+
assertEquals(List.of(10), KMP.kmpMatcher("ABABDABACDABABCABAB", "ABABCABAB"));
22+
assertEquals(List.of(), KMP.kmpMatcher("ABCDE", "FGH"));
23+
assertEquals(List.of(), KMP.kmpMatcher("A", "AA"));
24+
assertEquals(List.of(0, 1, 2), KMP.kmpMatcher("AAA", "A"));
25+
assertEquals(List.of(0), KMP.kmpMatcher("A", "A"));
26+
assertEquals(List.of(), KMP.kmpMatcher("", "A"));
27+
assertEquals(List.of(), KMP.kmpMatcher("A", ""));
28+
}
29+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
package com.thealgorithms.strings;
2+
3+
import static org.junit.jupiter.api.Assertions.assertEquals;
4+
5+
import java.util.List;
6+
import org.junit.jupiter.api.Test;
7+
8+
public class RabinKarpTest {
9+
10+
@Test
11+
public void testNullInputs() {
12+
assertEquals(List.of(), RabinKarp.search(null, "A"));
13+
assertEquals(List.of(), RabinKarp.search("A", null));
14+
assertEquals(List.of(), RabinKarp.search(null, null));
15+
}
16+
17+
@Test
18+
public void testHashCollision() {
19+
// 'a' = 97. (char)198 % 101 = 97.
20+
// For length 1, h = 1. p = 97. t = 198 % 101 = 97.
21+
// Collision occurs, loop checks characters: 198 != 97, breaks.
22+
char collisionChar = (char) 198;
23+
String text = String.valueOf(collisionChar);
24+
String pattern = "a";
25+
assertEquals(List.of(), RabinKarp.search(text, pattern));
26+
}
27+
28+
@Test
29+
public void testSearchWithCustomQ() {
30+
// Using a different prime
31+
assertEquals(List.of(0, 1), RabinKarp.search("AAAA", "AAA", 13));
32+
}
33+
34+
@Test
35+
public void testRabinKarpSearch() {
36+
assertEquals(List.of(0, 1), RabinKarp.search("AAAAABAAABA", "AAAA"));
37+
assertEquals(List.of(0, 3), RabinKarp.search("ABCABC", "ABC"));
38+
assertEquals(List.of(10), RabinKarp.search("ABABDABACDABABCABAB", "ABABCABAB"));
39+
assertEquals(List.of(), RabinKarp.search("ABCDE", "FGH"));
40+
assertEquals(List.of(), RabinKarp.search("A", "AA"));
41+
assertEquals(List.of(0, 1, 2), RabinKarp.search("AAA", "A"));
42+
assertEquals(List.of(0), RabinKarp.search("A", "A"));
43+
assertEquals(List.of(), RabinKarp.search("", "A"));
44+
assertEquals(List.of(), RabinKarp.search("A", ""));
45+
}
46+
}

0 commit comments

Comments
 (0)