Algorithms_in_C++ 1.0.0
Set of algorithms implemented in C++.
Loading...
Searching...
No Matches
string_search Namespace Reference

String search algorithms. More...

Functions

int brute_force (const std::string &text, const std::string &pattern)
 
std::vector< int > getFailureArray (const std::string &pattern)
 
bool kmp (const std::string &pattern, const std::string &text)
 
int64_t create_hash (const std::string &s, int n)
 
int64_t recalculate_hash (const std::string &s, int old_index, int new_index, int64_t old_hash, int patLength)
 
bool check_if_equal (const std::string &str1, const std::string &str2, int start1, int end1, int start2, int end2)
 
int rabin_karp (const std::string &str, const std::string &pat)
 

Detailed Description

String search algorithms.

Function Documentation

◆ brute_force()

int string_search::brute_force ( const std::string & text,
const std::string & pattern )

Find a pattern in a string by comparing the pattern to every substring.

Parameters
textAny string that might contain the pattern.
patternString that we are searching for.
Returns
Index where the pattern starts in the text
-1 if the pattern was not found.
21 {
22 size_t pat_l = pattern.length();
23 size_t txt_l = text.length();
24 int index = -1;
25 if (pat_l <= txt_l) {
26 for (size_t i = 0; i < txt_l - pat_l + 1; i++) {
27 std::string s = text.substr(i, pat_l);
28 if (s == pattern) {
29 index = i;
30 break;
31 }
32 }
33 }
34 return index;
35}
T length(T... args)
T substr(T... args)
Here is the call graph for this function:

◆ check_if_equal()

bool string_search::check_if_equal ( const std::string & str1,
const std::string & str2,
int start1,
int end1,
int start2,
int end2 )

compare if two sub-strings are equal

Parameters
[in]str1string pattern to search
[in]str2text in which to search
[in]start1,end1start and end indices for substring in str1
[in]start2,end2start and end indices for substring in str2
Returns
true if pattern was found
false if pattern was not found
Note
can this be replaced by std::string::compare?
61 {
62 if (end1 - start1 != end2 - start2) {
63 return false;
64 }
65 while (start1 <= end1 && start2 <= end2) {
66 if (str1[start1] != str2[start2]) {
67 return false;
68 }
69 start1++;
70 start2++;
71 }
72 return true;
73}

◆ create_hash()

int64_t string_search::create_hash ( const std::string & s,
int n )

convert a string to an intger - called as hashing function

Parameters
[in]ssource of string to hash
[in]nlength of substring to hash
Returns
hash integer
25 {
26 int64_t result = 0;
27 for (int i = 0; i < n; ++i) {
28 result += (int64_t)(s[i] * (int64_t)pow(PRIME, i));
29 }
30 return result;
31}
#define PRIME
Prime modulus for hash functions.
Definition rabin_karp.cpp:16

◆ getFailureArray()

std::vector< int > string_search::getFailureArray ( const std::string & pattern)

Generate the partial match table aka failure function for a pattern to search.

Parameters
[in]patterntext for which to create the partial match table
Returns
the partial match table as a vector array
33 {
34 int pattern_length = pattern.size();
35 std::vector<int> failure(pattern_length + 1);
36 failure[0] = -1;
37 int j = -1;
38
39 for (int i = 0; i < pattern_length; i++) {
40 while (j != -1 && pattern[j] != pattern[i]) {
41 j = failure[j];
42 }
43 j++;
44 failure[i + 1] = j;
45 }
46 return failure;
47}
Here is the call graph for this function:

◆ kmp()

bool string_search::kmp ( const std::string & pattern,
const std::string & text )

KMP algorithm to find a pattern in a text

Parameters
[in]patternstring pattern to search
[in]texttext in which to search
Returns
true if pattern was found
false if pattern was not found
56 {
57 int text_length = text.size(), pattern_length = pattern.size();
58 std::vector<int> failure = getFailureArray(pattern);
59
60 int k = 0;
61 for (int j = 0; j < text_length; j++) {
62 while (k != -1 && pattern[k] != text[j]) {
63 k = failure[k];
64 }
65 k++;
66 if (k == pattern_length)
67 return true;
68 }
69 return false;
70}
std::vector< int > getFailureArray(const std::string &pattern)
Definition knuth_morris_pratt.cpp:33
Here is the call graph for this function:

◆ rabin_karp()

int string_search::rabin_karp ( const std::string & str,
const std::string & pat )

Perform string pattern search using Rabin-Karp algorithm

Parameters
[in]strstring to search in
[in]patpattern to search for
Returns
index of first occurrence of pattern
-1 if pattern not found
83 {
84 int64_t pat_hash = create_hash(pat, pat.size());
85 int64_t str_hash = create_hash(str, pat.size());
86 for (int i = 0; i <= str.size() - pat.size(); ++i) {
87 if (pat_hash == str_hash &&
88 check_if_equal(str, pat, i, i + pat.size() - 1, 0,
89 pat.size() - 1)) {
90 return i;
91 }
92 if (i < str.size() - pat.size()) {
93 str_hash =
94 recalculate_hash(str, i, i + pat.size(), str_hash, pat.size());
95 }
96 }
97 return -1; // return -1 if given pattern not found
98}
int64_t create_hash(const std::string &s, int n)
Definition rabin_karp.cpp:25
bool check_if_equal(const std::string &str1, const std::string &str2, int start1, int end1, int start2, int end2)
Definition rabin_karp.cpp:60
Here is the call graph for this function:

◆ recalculate_hash()

int64_t string_search::recalculate_hash ( const std::string & s,
int old_index,
int new_index,
int64_t old_hash,
int patLength )

re-hash a string using known existing hash

Parameters
[in]ssource of string to hash
[in]old_indexprevious index of string
[in]new_indexnew index of string
[in]old_hashprevious hash of substring
[in]patLengthlength of substring to hash
Returns
new hash integer
43 {
44 int64_t new_hash = old_hash - s[old_index];
45 new_hash /= PRIME;
46 new_hash += (int64_t)(s[new_index] * (int64_t)pow(PRIME, patLength - 1));
47 return new_hash;
48}