Aleph-w 3.0
A C++ Library for Data Structures and Algorithms
Loading...
Searching...
No Matches
String_Search.H
Go to the documentation of this file.
1/*
2 Aleph_w
3
4 Data structures & Algorithms
5 version 2.0.0b
6 https://github.com/lrleon/Aleph-w
7
8 This file is part of Aleph-w library
9
10 Copyright (c) 2002-2026 Leandro Rabindranath Leon
11
12 Permission is hereby granted, free of charge, to any person obtaining a copy
13 of this software and associated documentation files (the "Software"), to deal
14 in the Software without restriction, including without limitation the rights
15 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16 copies of the Software, and to permit persons to whom the Software is
17 furnished to do so, subject to the following conditions:
18
19 The above copyright notice and this permission notice shall be included in all
20 copies or substantial portions of the Software.
21
22 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 SOFTWARE.
29*/
30
31
53# ifndef STRING_SEARCH_H
54# define STRING_SEARCH_H
55
56# include <cstdint>
57# include <algorithm>
58# include <string>
59# include <string_view>
60# include <array>
61
62# include <tpl_array.H>
63
64namespace Aleph
65{
66 namespace string_search_detail
67 {
72 inline Array<size_t> all_match_positions(const size_t text_size)
73 {
75 matches.reserve(text_size + 1);
76 for (size_t i = 0; i <= text_size; ++i)
77 matches.append(i);
78 return matches;
79 }
80
81
88 inline bool find_unused_delimiter(const std::string_view a,
89 const std::string_view b,
90 char & delimiter)
91 {
92 std::array<bool, 256> used{};
93 used.fill(false);
94 for (const unsigned char c : a)
95 used[c] = true;
96
97 for (const unsigned char c : b)
98 used[c] = true;
99
100 for (size_t i = 0; i < 256; ++i)
101 if (not used[i])
102 {
103 delimiter = static_cast<char>(i);
104 return true;
105 }
106
107 return false;
108 }
109
110
117 inline bool equals_at(const std::string_view text,
118 const std::string_view pattern,
119 const size_t pos)
120 {
121 for (size_t i = 0; i < pattern.size(); ++i)
122 if (text[pos + i] != pattern[i])
123 return false;
124 return true;
125 }
126 } // namespace string_search_detail
127
128
141 inline Array<size_t> kmp_prefix_function(const std::string_view pattern)
142 {
143 Array<size_t> pi;
144 if (pattern.empty())
145 return pi;
146
147 pi = Array<size_t>::create(pattern.size());
148 pi[0] = 0;
149
150 size_t j = 0;
151 for (size_t i = 1; i < pattern.size(); ++i)
152 {
153 while (j > 0 and pattern[i] != pattern[j])
154 j = pi[j - 1];
155
156 if (pattern[i] == pattern[j])
157 ++j;
158
159 pi[i] = j;
160 }
161
162 return pi;
163 }
164
165
178 inline Array<size_t> kmp_search(const std::string_view text,
179 const std::string_view pattern)
180 {
181 if (pattern.empty())
183
185 if (text.size() < pattern.size())
186 return matches;
187
188 const Array<size_t> pi = kmp_prefix_function(pattern);
189
190 size_t j = 0;
191 for (size_t i = 0; i < text.size(); ++i)
192 {
193 while (j > 0 and text[i] != pattern[j])
194 j = pi[j - 1];
195
196 if (text[i] == pattern[j])
197 ++j;
198
199 if (j == pattern.size())
200 {
201 matches.append(i + 1 - pattern.size());
202 j = pi[j - 1];
203 }
204 }
205
206 return matches;
207 }
208
209
222 inline Array<size_t> z_algorithm(const std::string_view s)
223 {
225 if (s.empty())
226 return z;
227
228 z = Array<size_t>::create(s.size());
229 z[0] = s.size();
230
231 size_t l = 0;
232 size_t r = 0;
233
234 for (size_t i = 1; i < s.size(); ++i)
235 {
236 if (i <= r)
237 z[i] = std::min(r - i + 1, z[i - l]);
238 else
239 z[i] = 0;
240
241 while (i + z[i] < s.size() and s[z[i]] == s[i + z[i]])
242 ++z[i];
243
244 if (const size_t reach = i + z[i]; reach > r + 1)
245 {
246 l = i;
247 r = reach - 1;
248 }
249 }
250
251 return z;
252 }
253
254
269 inline Array<size_t> z_search(const std::string_view text,
270 const std::string_view pattern)
271 {
272 if (pattern.empty())
274
275 if (text.size() < pattern.size())
276 return {};
277
278 char delimiter = 0;
280 return kmp_search(text, pattern);
281
282 std::string combined;
283 combined.reserve(pattern.size() + text.size() + 1);
284 combined += pattern;
286 combined += text;
287
289
291 const size_t offset = pattern.size() + 1;
292 for (size_t i = offset; i < combined.size(); ++i)
293 if (z[i] >= pattern.size())
294 matches.append(i - offset);
295
296 return matches;
297 }
298
299
313 inline Array<size_t> boyer_moore_horspool_search(const std::string_view text,
314 const std::string_view pattern)
315 {
316 if (pattern.empty())
318
320 const size_t n = text.size();
321 const size_t m = pattern.size();
322
323 if (n < m)
324 return matches;
325
327 for (size_t i = 0; i < 256; ++i)
328 shift[i] = m;
329
330 for (size_t i = 0; i + 1 < m; ++i)
331 shift[static_cast<unsigned char>(pattern[i])] = m - 1 - i;
332
333 size_t i = 0;
334 while (i + m <= n)
335 {
336 size_t j = m;
337 while (j > 0 and pattern[j - 1] == text[i + j - 1])
338 --j;
339
340 if (j == 0)
341 matches.append(i);
342
343 const auto c = static_cast<unsigned char>(text[i + m - 1]);
344 const size_t step = shift[c] == 0 ? 1 : shift[c];
345 i += step;
346 }
347
348 return matches;
349 }
350
351
371 inline Array<size_t> rabin_karp_search(const std::string_view text,
372 const std::string_view pattern,
373 const uint64_t base = 911382323ull)
374 {
375 if (pattern.empty())
377
379 const size_t n = text.size();
380 const size_t m = pattern.size();
381
382 if (n < m)
383 return matches;
384
385 uint64_t power = 1;
386 for (size_t i = 1; i < m; ++i)
387 power *= base;
388
391
392 for (size_t i = 0; i < m; ++i)
393 {
395 + static_cast<unsigned char>(pattern[i]) + 1ull;
396 window_hash = window_hash * base
397 + static_cast<unsigned char>(text[i]) + 1ull;
398 }
399
402 matches.append(0);
403
404 for (size_t i = m; i < n; ++i)
405 {
406 const uint64_t old_c = static_cast<unsigned char>(text[i - m]) + 1ull;
407 const uint64_t new_c = static_cast<unsigned char>(text[i]) + 1ull;
408
409 window_hash = (window_hash - old_c * power) * base + new_c;
410
411 if (const size_t pos = i + 1 - m; window_hash == pattern_hash
413 matches.append(pos);
414 }
415
416 return matches;
417 }
418} // namespace Aleph
419
420# endif // STRING_SEARCH_H
Simple dynamic array with automatic resizing and functional operations.
Definition tpl_array.H:139
static Array create(size_t n)
Create an array with n logical elements.
Definition tpl_array.H:194
constexpr size_t size() const noexcept
Return the number of elements stored in the stack.
Definition tpl_array.H:351
T & append(const T &data)
Append a copy of data
Definition tpl_array.H:245
void reserve(size_t cap)
Reserves cap cells into the array.
Definition tpl_array.H:315
Key * append(const Key &key)
Alias for insert() (copy version).
Definition hashDry.H:389
constexpr size_t size() const noexcept
Returns the number of entries in the table.
Definition hashDry.H:619
const long double offset[]
Offset values indexed by symbol string length (bounded by MAX_OFFSET_INDEX)
bool equals_at(const std::string_view text, const std::string_view pattern, const size_t pos)
Check if pattern matches text at given position.
bool find_unused_delimiter(const std::string_view a, const std::string_view b, char &delimiter)
Find a character not present in either string.
Array< size_t > all_match_positions(const size_t text_size)
Generate an array of all positions from 0 to text_size.
Main namespace for Aleph-w library functions.
Definition ah-arena.H:89
and
Check uniqueness with explicit hash + equality functors.
Array< size_t > z_search(const std::string_view text, const std::string_view pattern)
Find all occurrences of a pattern using the Z-algorithm.
Array< size_t > rabin_karp_search(const std::string_view text, const std::string_view pattern, const uint64_t base=911382323ull)
Find all occurrences using Rabin-Karp with rolling hash.
Array< size_t > z_algorithm(const std::string_view s)
Compute Z-array for a string.
Divide_Conquer_DP_Result< Cost > divide_and_conquer_partition_dp(const size_t groups, const size_t n, Transition_Cost_Fn transition_cost, const Cost inf=dp_optimization_detail::default_inf< Cost >())
Optimize partition DP using divide-and-conquer optimization.
Array< size_t > kmp_search(const std::string_view text, const std::string_view pattern)
Find all occurrences of a pattern using KMP.
Array< size_t > kmp_prefix_function(const std::string_view pattern)
Prefix-function used by KMP.
Array< size_t > boyer_moore_horspool_search(const std::string_view text, const std::string_view pattern)
Find all occurrences using Boyer-Moore-Horspool.
FooMap m(5, fst_unit_pair_hash, snd_unit_pair_hash)
gsl_rng * r
Dynamic array container with automatic resizing.
DynList< int > l