Algorithms_in_C++ 1.0.0
Set of algorithms implemented in C++.
Loading...
Searching...
No Matches
strings::boyer_moore Namespace Reference

Functions for the Boyer Moore algorithm implementation. More...

Classes

struct  pattern
 A structure representing all the data we need to search the preprocessed pattern in text. More...
 

Functions

void init_good_suffix (const std::string &str, std::vector< size_t > &arg)
 A function that preprocess the good suffix thable.
 
void init_bad_char (const std::string &str, std::vector< size_t > &arg)
 A function that preprocess the bad char table.
 
void init_pattern (const std::string &str, pattern &arg)
 A function that initializes pattern.
 
std::vector< size_t > search (const std::string &str, const pattern &arg)
 A function that implements Boyer-Moore's algorithm.
 
bool is_prefix (const char *str, const char *pat, size_t len)
 Check if pat is prefix of str.
 

Detailed Description

Functions for the Boyer Moore algorithm implementation.

Function Documentation

◆ init_bad_char()

void strings::boyer_moore::init_bad_char ( const std::string & str,
std::vector< size_t > & arg )

A function that preprocess the bad char table.

Parameters
strThe string being preprocessed
argThe bad char table
Returns
void
138 {
139 arg.resize(APLHABET_SIZE, str.length());
140
141 for (size_t i = 0; i < str.length(); i++) {
142 arg[str[i]] = str.length() - i - 1;
143 }
144}
#define APLHABET_SIZE
number of symbols in the alphabet we use
Definition boyer_moore.cpp:52
T resize(T... args)
T length(T... args)
Here is the call graph for this function:

◆ init_good_suffix()

void strings::boyer_moore::init_good_suffix ( const std::string & str,
std::vector< size_t > & arg )

A function that preprocess the good suffix thable.

Parameters
strThe string being preprocessed
argThe good suffix table
Returns
void
89 {
90 arg.resize(str.size() + 1, 0);
91
92 // border_pos[i] - the index of the longest proper suffix of str[i..] which
93 // is also a proper prefix.
94 std::vector<size_t> border_pos(str.size() + 1, 0);
95
96 size_t current_char = str.length();
97
98 size_t border_index = str.length() + 1;
99
100 border_pos[current_char] = border_index;
101
102 while (current_char > 0) {
103 while (border_index <= str.length() &&
104 str[current_char - 1] != str[border_index - 1]) {
105 if (arg[border_index] == 0) {
106 arg[border_index] = border_index - current_char;
107 }
108
109 border_index = border_pos[border_index];
110 }
111
112 current_char--;
113 border_index--;
114 border_pos[current_char] = border_index;
115 }
116
117 size_t largest_border_index = border_pos[0];
118
119 for (size_t i = 0; i < str.size(); i++) {
120 if (arg[i] == 0) {
121 arg[i] = largest_border_index;
122 }
123
124 // If we go pass the largest border we find the next one as we iterate
125 if (i == largest_border_index) {
126 largest_border_index = border_pos[largest_border_index];
127 }
128 }
129}
Here is the call graph for this function:

◆ init_pattern()

void strings::boyer_moore::init_pattern ( const std::string & str,
pattern & arg )

A function that initializes pattern.

Parameters
strText used for initialization
argInitialized structure
Returns
void
153 {
154 arg.pat = str;
155 init_bad_char(str, arg.bad_char);
157}
void init_bad_char(const std::string &str, std::vector< size_t > &arg)
A function that preprocess the bad char table.
Definition boyer_moore.cpp:138
void init_good_suffix(const std::string &str, std::vector< size_t > &arg)
A function that preprocess the good suffix thable.
Definition boyer_moore.cpp:89
std::vector< size_t > good_suffix
Definition boyer_moore.cpp:78
std::vector< size_t > bad_char
Definition boyer_moore.cpp:74
Here is the call graph for this function:

◆ is_prefix()

bool strings::boyer_moore::is_prefix ( const char * str,
const char * pat,
size_t len )

Check if pat is prefix of str.

Parameters
strpointer to some part of the input text.
patthe searched pattern.
lenlength of the searched pattern
Returns
true if pat IS prefix of str.
false if pat is NOT a prefix of str.
200 {
201 if (strlen(str) < len) {
202 return false;
203 }
204
205 for (size_t i = 0; i < len; i++) {
206 if (str[i] != pat[i]) {
207 return false;
208 }
209 }
210
211 return true;
212}

◆ search()

std::vector< size_t > strings::boyer_moore::search ( const std::string & str,
const pattern & arg )

A function that implements Boyer-Moore's algorithm.

Parameters
strText we are seatching in.
argpattern structure containing the preprocessed pattern
Returns
Vector of indexes of the occurrences of pattern in text
165 {
166 size_t index_position = arg.pat.size() - 1;
167 std::vector<size_t> index_storage;
168
169 while (index_position < str.length()) {
170 size_t index_string = index_position;
171 int index_pattern = static_cast<int>(arg.pat.size()) - 1;
172
173 while (index_pattern >= 0 &&
174 str[index_string] == arg.pat[index_pattern]) {
175 --index_pattern;
176 --index_string;
177 }
178
179 if (index_pattern < 0) {
180 index_storage.push_back(index_position - arg.pat.length() + 1);
181 index_position += arg.good_suffix[0];
182 } else {
183 index_position += std::max(arg.bad_char[str[index_string]],
184 arg.good_suffix[index_pattern + 1]);
185 }
186 }
187
188 return index_storage;
189}
T max(T... args)
T push_back(T... args)
Here is the call graph for this function: