Aleph-w 3.0
A C++ Library for Data Structures and Algorithms
Loading...
Searching...
No Matches
String_DP.H
Go to the documentation of this file.
1/*
2 Aleph_w
3
4 Data structures & Algorithms
5 version 2.0.0b
6 https://github.com/lrleon/Aleph-w
7
8 This file is part of Aleph-w library
9
10 Copyright (c) 2002-2026 Leandro Rabindranath Leon
11
12 Permission is hereby granted, free of charge, to any person obtaining a copy
13 of this software and associated documentation files (the "Software"), to deal
14 in the Software without restriction, including without limitation the rights
15 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16 copies of the Software, and to permit persons to whom the Software is
17 furnished to do so, subject to the following conditions:
18
19 The above copyright notice and this permission notice shall be included in all
20 copies or substantial portions of the Software.
21
22 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 SOFTWARE.
29*/
30
31
49# ifndef STRING_DP_H
50# define STRING_DP_H
51
52# include <algorithm>
53# include <string>
54# include <string_view>
55# include <array>
56
57# include <tpl_array.H>
58
59namespace Aleph
60{
61 namespace string_dp_detail
62 {
63 inline Array<size_t> make_zero_row(const size_t cols)
64 {
66 for (size_t j = 0; j < cols; ++j)
67 row[j] = 0;
68 return row;
69 }
70
71
72 inline Array<Array<size_t>> make_zero_matrix(const size_t rows,
73 const size_t cols)
74 {
76 mat.reserve(rows);
77 for (size_t i = 0; i < rows; ++i)
78 mat.append(make_zero_row(cols));
79 return mat;
80 }
81 } // namespace string_dp_detail
82
83
94 inline size_t levenshtein_distance(const std::string_view a,
95 const std::string_view b)
96 {
97 if (a.empty())
98 return b.size();
99 if (b.empty())
100 return a.size();
101
102 std::string_view s1 = a;
103 std::string_view s2 = b;
104 if (s1.size() > s2.size())
105 std::swap(s1, s2);
106
107 Array<size_t> prev = Array<size_t>::create(s1.size() + 1);
108 Array<size_t> curr = Array<size_t>::create(s1.size() + 1);
109
110 for (size_t j = 0; j <= s1.size(); ++j)
111 {
112 prev[j] = 0;
113 curr[j] = 0;
114 }
115
116 for (size_t j = 0; j <= s1.size(); ++j)
117 prev[j] = j;
118
119 for (size_t i = 1; i <= s2.size(); ++i)
120 {
121 curr[0] = i;
122
123 for (size_t j = 1; j <= s1.size(); ++j)
124 {
125 const size_t cost = s2[i - 1] == s1[j - 1] ? 0 : 1;
126 curr[j] = std::min({
127 prev[j] + 1,
128 curr[j - 1] + 1,
129 prev[j - 1] + cost
130 });
131 }
132
133 prev.swap(curr);
134 }
135
136 return prev[s1.size()];
137 }
138
139
147 inline size_t edit_distance(const std::string_view a,
148 const std::string_view b)
149 {
150 return levenshtein_distance(a, b);
151 }
152
153
167 inline size_t damerau_levenshtein_distance(const std::string_view a,
168 const std::string_view b)
169 {
170 const size_t n = a.size();
171 const size_t m = b.size();
172
173 const size_t inf = n + m;
176
177 d[0][0] = inf;
178 for (size_t i = 0; i <= n; ++i)
179 {
180 d[i + 1][1] = i;
181 d[i + 1][0] = inf;
182 }
183
184 for (size_t j = 0; j <= m; ++j)
185 {
186 d[1][j + 1] = j;
187 d[0][j + 1] = inf;
188 }
189
190 std::array<size_t, 256> last_row = {0};
191
192 for (size_t i = 1; i <= n; ++i)
193 {
194 size_t last_match_col = 0;
195
196 for (size_t j = 1; j <= m; ++j)
197 {
198 const size_t i1 = last_row[static_cast<unsigned char>(b[j - 1])];
199 const size_t j1 = last_match_col;
200
201 size_t cost = 1;
202 if (a[i - 1] == b[j - 1])
203 {
204 cost = 0;
205 last_match_col = j;
206 }
207
208 d[i + 1][j + 1] = std::min({
209 d[i][j] + cost,
210 d[i + 1][j] + 1,
211 d[i][j + 1] + 1,
212 d[i1][j1] + (i - i1 - 1) + 1 + (j - j1 - 1)
213 });
214 }
215
216 last_row[static_cast<unsigned char>(a[i - 1])] = i;
217 }
218
219 return d[n + 1][m + 1];
220 }
221
222
225 {
226 size_t length = 0;
227 std::string subsequence;
228 };
229
230
241 inline LCS_Result longest_common_subsequence(const std::string_view a,
242 const std::string_view b)
243 {
244 const size_t n = a.size();
245 const size_t m = b.size();
246
249
250 for (size_t i = 1; i <= n; ++i)
251 for (size_t j = 1; j <= m; ++j)
252 if (a[i - 1] == b[j - 1])
253 dp[i][j] = dp[i - 1][j - 1] + 1;
254 else
255 dp[i][j] = std::max(dp[i - 1][j], dp[i][j - 1]);
256
257 std::string seq;
258 seq.reserve(dp[n][m]);
259
260 size_t i = n;
261 size_t j = m;
262 while (i > 0 and j > 0)
263 if (a[i - 1] == b[j - 1])
264 {
265 seq.push_back(a[i - 1]);
266 --i;
267 --j;
268 }
269 else if (dp[i - 1][j] >= dp[i][j - 1])
270 --i;
271 else
272 --j;
273
274 std::reverse(seq.begin(), seq.end());
275
276 return LCS_Result{dp[n][m], seq};
277 }
278
279
282 {
283 size_t length = 0;
284 size_t begin_a = 0;
285 size_t begin_b = 0;
286 std::string substring;
287 };
288
289
301 longest_common_substring(const std::string_view a,
302 const std::string_view b)
303 {
304 const size_t n = a.size();
305 const size_t m = b.size();
306
309
310 for (size_t j = 0; j <= m; ++j)
311 {
312 prev[j] = 0;
313 curr[j] = 0;
314 }
315
316 size_t best_len = 0;
317 size_t end_a = 0;
318 size_t end_b = 0;
319
320 for (size_t i = 1; i <= n; ++i)
321 {
322 for (size_t j = 1; j <= m; ++j)
323 if (a[i - 1] == b[j - 1])
324 {
325 curr[j] = prev[j - 1] + 1;
326 if (curr[j] > best_len)
327 {
328 best_len = curr[j];
329 end_a = i;
330 end_b = j;
331 }
332 }
333 else
334 curr[j] = 0;
335
336 prev.swap(curr);
337 for (size_t j = 0; j <= m; ++j)
338 curr[j] = 0;
339 }
340
342 result.length = best_len;
343
344 if (best_len == 0)
345 {
346 result.substring = "";
347 return result;
348 }
349
350 result.begin_a = end_a - best_len;
351 result.begin_b = end_b - best_len;
352 result.substring = std::string(a.substr(result.begin_a, best_len));
353
354 return result;
355 }
356} // namespace Aleph
357
358# endif // STRING_DP_H
Simple dynamic array with automatic resizing and functional operations.
Definition tpl_array.H:139
static Array create(size_t n)
Create an array with n logical elements.
Definition tpl_array.H:194
constexpr size_t size() const noexcept
Return the number of elements stored in the stack.
Definition tpl_array.H:351
void swap(Array &s) noexcept
Swap this with s
Definition tpl_array.H:227
T & append(const T &data)
Append a copy of data
Definition tpl_array.H:245
void reserve(size_t cap)
Reserves cap cells into the array.
Definition tpl_array.H:315
constexpr size_t size() const noexcept
Returns the number of entries in the table.
Definition hashDry.H:619
__gmp_expr< T, __gmp_unary_expr< __gmp_expr< T, U >, __gmp_j1_function > > j1(const __gmp_expr< T, U > &expr)
Definition gmpfrxx.h:4100
Array< Array< size_t > > make_zero_matrix(const size_t rows, const size_t cols)
Definition String_DP.H:72
Array< size_t > make_zero_row(const size_t cols)
Definition String_DP.H:63
Main namespace for Aleph-w library functions.
Definition ah-arena.H:89
LCS_Result longest_common_subsequence(const std::string_view a, const std::string_view b)
Compute Longest Common Subsequence.
Definition String_DP.H:241
and
Check uniqueness with explicit hash + equality functors.
size_t levenshtein_distance(const std::string_view a, const std::string_view b)
Levenshtein distance (insert/delete/substitute each cost 1).
Definition String_DP.H:94
size_t edit_distance(const std::string_view a, const std::string_view b)
Alias for Levenshtein distance.
Definition String_DP.H:147
Divide_Conquer_DP_Result< Cost > divide_and_conquer_partition_dp(const size_t groups, const size_t n, Transition_Cost_Fn transition_cost, const Cost inf=dp_optimization_detail::default_inf< Cost >())
Optimize partition DP using divide-and-conquer optimization.
Longest_Common_Substring_Result longest_common_substring(const std::string_view a, const std::string_view b)
Compute the longest common substring (contiguous) between two strings.
Definition String_DP.H:301
size_t damerau_levenshtein_distance(const std::string_view a, const std::string_view b)
Damerau-Levenshtein distance with adjacent transpositions.
Definition String_DP.H:167
Result for Longest Common Subsequence (LCS).
Definition String_DP.H:225
std::string subsequence
The subsequence string.
Definition String_DP.H:227
size_t length
Length of the subsequence.
Definition String_DP.H:226
Result for longest common substring.
Definition String_DP.H:282
size_t begin_a
Start index in first string.
Definition String_DP.H:284
size_t length
Length of common substring.
Definition String_DP.H:283
std::string substring
The common substring.
Definition String_DP.H:286
size_t begin_b
Start index in second string.
Definition String_DP.H:285
FooMap m(5, fst_unit_pair_hash, snd_unit_pair_hash)
Dynamic array container with automatic resizing.