TheAlgorithms/C++ 1.0.0
All the algorithms implemented in C++
Loading...
Searching...
No Matches
boyer_moore.cpp
Go to the documentation of this file.
1
45#include <cassert>
46#include <climits>
47#include <cstring>
48#include <iostream>
49#include <string>
50#include <vector>
51
52#define APLHABET_SIZE CHAR_MAX
53
58namespace strings {
65namespace boyer_moore {
70struct pattern {
71 std::string pat;
72
73 std::vector<size_t>
76
77 std::vector<size_t>
80};
81
89void init_good_suffix(const std::string& str, std::vector<size_t>& arg) {
90 arg.resize(str.size() + 1, 0);
91
92 // border_pos[i] - the index of the longest proper suffix of str[i..] which
93 // is also a proper prefix.
94 std::vector<size_t> border_pos(str.size() + 1, 0);
95
96 size_t current_char = str.length();
97
98 size_t border_index = str.length() + 1;
99
100 border_pos[current_char] = border_index;
101
102 while (current_char > 0) {
103 while (border_index <= str.length() &&
104 str[current_char - 1] != str[border_index - 1]) {
105 if (arg[border_index] == 0) {
106 arg[border_index] = border_index - current_char;
107 }
108
109 border_index = border_pos[border_index];
110 }
111
112 current_char--;
113 border_index--;
114 border_pos[current_char] = border_index;
115 }
116
117 size_t largest_border_index = border_pos[0];
118
119 for (size_t i = 0; i < str.size(); i++) {
120 if (arg[i] == 0) {
121 arg[i] = largest_border_index;
122 }
123
124 // If we go pass the largest border we find the next one as we iterate
125 if (i == largest_border_index) {
126 largest_border_index = border_pos[largest_border_index];
127 }
128 }
129}
130
138void init_bad_char(const std::string& str, std::vector<size_t>& arg) {
139 arg.resize(APLHABET_SIZE, str.length());
140
141 for (size_t i = 0; i < str.length(); i++) {
142 arg[str[i]] = str.length() - i - 1;
143 }
144}
145
153void init_pattern(const std::string& str, pattern& arg) {
154 arg.pat = str;
155 init_bad_char(str, arg.bad_char);
157}
165std::vector<size_t> search(const std::string& str, const pattern& arg) {
166 size_t index_position = arg.pat.size() - 1;
167 std::vector<size_t> index_storage;
168
169 while (index_position < str.length()) {
170 size_t index_string = index_position;
171 int index_pattern = static_cast<int>(arg.pat.size()) - 1;
172
173 while (index_pattern >= 0 &&
174 str[index_string] == arg.pat[index_pattern]) {
175 --index_pattern;
176 --index_string;
177 }
178
179 if (index_pattern < 0) {
180 index_storage.push_back(index_position - arg.pat.length() + 1);
181 index_position += arg.good_suffix[0];
182 } else {
183 index_position += std::max(arg.bad_char[str[index_string]],
184 arg.good_suffix[index_pattern + 1]);
185 }
186 }
187
188 return index_storage;
189}
190
200bool is_prefix(const char* str, const char* pat, size_t len) {
201 if (strlen(str) < len) {
202 return false;
203 }
204
205 for (size_t i = 0; i < len; i++) {
206 if (str[i] != pat[i]) {
207 return false;
208 }
209 }
210
211 return true;
212}
213} // namespace boyer_moore
214} // namespace strings
220void and_test(const char* text) {
223 std::vector<size_t> indexes = strings::boyer_moore::search(text, ands);
224
225 assert(indexes.size() == 2);
226 assert(strings::boyer_moore::is_prefix(text + indexes[0], "and", 3));
227 assert(strings::boyer_moore::is_prefix(text + indexes[1], "and", 3));
228}
229
235void pat_test(const char* text) {
238 std::vector<size_t> indexes = strings::boyer_moore::search(text, pat);
239
240 assert(indexes.size() == 6);
241
242 for (const auto& currentIndex : indexes) {
243 assert(strings::boyer_moore::is_prefix(text + currentIndex, "pat", 3));
244 }
245}
250static void tests() {
251 const char* text =
252 "When pat Mr. and Mrs. pat Dursley woke up on the dull, gray \
253 Tuesday our story starts, \
254 there was nothing about pat the cloudy sky outside to pat suggest that\
255 strange and \
256 mysterious things would pat soon be happening all pat over the \
257 country.";
258
259 and_test(text);
260 pat_test(text);
261
262 std::cout << "All tests have successfully passed!\n";
263}
264
269int main() {
270 tests(); // run self-test implementations
271 return 0;
272}
static void tests()
Self-test implementations.
#define APLHABET_SIZE
number of symbols in the alphabet we use
void pat_test(const char *text)
A test case in which we search for every appearance of the word 'pat'.
void and_test(const char *text)
A test case in which we search for every appearance of the word 'and'.
int main()
Main function.
for std::assert
bool is_prefix(const char *str, const char *pat, size_t len)
Check if pat is prefix of str.
void init_pattern(const std::string &str, pattern &arg)
A function that initializes pattern.
std::vector< size_t > search(const std::string &str, const pattern &arg)
A function that implements Boyer-Moore's algorithm.
void init_bad_char(const std::string &str, std::vector< size_t > &arg)
A function that preprocess the bad char table.
void init_good_suffix(const std::string &str, std::vector< size_t > &arg)
A function that preprocess the good suffix thable.
String algorithms.
A structure representing all the data we need to search the preprocessed pattern in text.
std::vector< size_t > good_suffix
std::vector< size_t > bad_char