Aleph-w 3.0
A C++ Library for Data Structures and Algorithms
Loading...
Searching...
No Matches
string_algorithms_test.cc
Go to the documentation of this file.
1/*
2 Aleph_w
3
4 Data structures & Algorithms
5 version 2.0.0b
6 https://github.com/lrleon/Aleph-w
7
8 This file is part of Aleph-w library
9
10 Copyright (c) 2002-2026 Leandro Rabindranath Leon
11
12 Permission is hereby granted, free of charge, to any person obtaining a copy
13 of this software and associated documentation files (the "Software"), to deal
14 in the Software without restriction, including without limitation the rights
15 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16 copies of the Software, and to permit persons to whom the Software is
17 furnished to do so, subject to the following conditions:
18
19 The above copyright notice and this permission notice shall be included in all
20 copies or substantial portions of the Software.
21
22 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 SOFTWARE.
29*/
30
31
37# include <gtest/gtest.h>
38
39# include <String_Algorithms.H>
40
41using namespace Aleph;
42
44{
45 EXPECT_EQ(kmp_search("abracadabra", "abra").size(), 2u);
46
48 ac.add_pattern("abra");
49 ac.build();
50 EXPECT_TRUE(ac.contains_any("abracadabra"));
51
52 EXPECT_EQ(suffix_array("banana").size(), 6u);
53 EXPECT_EQ(longest_palindromic_substring("abacaba"), "abacaba");
54 EXPECT_EQ(edit_distance("kitten", "sitting"), 3u);
55}
56
58{
59 const std::string text = "the quick brown fox jumps over the lazy dog fox";
60 const std::string pattern = "fox";
61
62 const auto kmp = kmp_search(text, pattern);
63 const auto z = z_search(text, pattern);
64 const auto bmh = boyer_moore_horspool_search(text, pattern);
65 const auto rk = rabin_karp_search(text, pattern);
66
67 ASSERT_EQ(kmp.size(), z.size());
68 ASSERT_EQ(kmp.size(), bmh.size());
69 ASSERT_EQ(kmp.size(), rk.size());
70
71 for (size_t i = 0; i < kmp.size(); ++i)
72 {
73 EXPECT_EQ(kmp[i], z[i]);
74 EXPECT_EQ(kmp[i], bmh[i]);
75 EXPECT_EQ(kmp[i], rk[i]);
76 }
77}
78
80{
81 const std::string text = "ababababab";
82 const std::string pattern = "abab";
83
84 const auto kmp_matches = kmp_search(text, pattern);
85
87 ac.add_pattern(pattern);
88 ac.build();
89 const auto ac_matches = ac.search(text);
90
91 // Same number of matches
92 ASSERT_EQ(ac_matches.size(), kmp_matches.size());
93
94 // Same positions
95 for (size_t i = 0; i < ac_matches.size(); ++i)
96 EXPECT_EQ(ac_matches[i].position, kmp_matches[i]);
97}
98
100{
101 const std::string text = "abracadabra";
102 const std::string pattern = "abra";
103
104 const auto kmp_matches = kmp_search(text, pattern);
105
107 auto tree_matches = st.find_all(pattern);
108
109 ASSERT_EQ(tree_matches.size(), kmp_matches.size());
110
111 for (size_t i = 0; i < tree_matches.size(); ++i)
113}
114
116{
117 const std::string text = "mississippi";
118
121 sam.build(text);
122
123 const char * patterns[] = {"issi", "miss", "pi", "ppi", "xyz", "sip", ""};
124 for (const char * p : patterns)
125 EXPECT_EQ(st.contains(p), sam.contains(p))
126 << "Disagreement on pattern: \"" << p << "\"";
127}
128
130{
131 const std::string a = "xabxac";
132 const std::string b = "abcabxabcd";
133
134 const auto dp_result = longest_common_substring(a, b);
135 const auto sam_result = longest_common_substring_sam(a, b);
136
137 // Both should find the same length
138 EXPECT_EQ(dp_result.length, sam_result.size());
139 EXPECT_EQ(dp_result.substring, sam_result);
140}
Umbrella header for classical string algorithms in Aleph-w.
Aho-Corasick multi-pattern automaton.
size_t add_pattern(std::string pattern)
Add one pattern to the automaton.
Naive compressed suffix tree (didactic implementation).
Array< size_t > find_all(const std::string_view pattern) const
Return all occurrences of a pattern.
bool contains(const std::string_view pattern) const
Return true if the pattern appears in the text.
Suffix automaton (SAM) over byte alphabet.
void build(const std::string_view text)
Build the SAM from an entire string.
#define TEST(name)
Main namespace for Aleph-w library functions.
Definition ah-arena.H:89
Array< size_t > z_search(const std::string_view text, const std::string_view pattern)
Find all occurrences of a pattern using the Z-algorithm.
Array< size_t > rabin_karp_search(const std::string_view text, const std::string_view pattern, const uint64_t base=911382323ull)
Find all occurrences using Rabin-Karp with rolling hash.
size_t size(Node *root) noexcept
size_t edit_distance(const std::string_view a, const std::string_view b)
Alias for Levenshtein distance.
Definition String_DP.H:147
Divide_Conquer_DP_Result< Cost > divide_and_conquer_partition_dp(const size_t groups, const size_t n, Transition_Cost_Fn transition_cost, const Cost inf=dp_optimization_detail::default_inf< Cost >())
Optimize partition DP using divide-and-conquer optimization.
std::string longest_common_substring_sam(const std::string_view a, const std::string_view b)
Convenience function: LCS via suffix automaton.
Longest_Common_Substring_Result longest_common_substring(const std::string_view a, const std::string_view b)
Compute the longest common substring (contiguous) between two strings.
Definition String_DP.H:301
std::string longest_palindromic_substring(const std::string_view text)
Convenience wrapper returning only the longest palindromic substring.
Array< size_t > kmp_search(const std::string_view text, const std::string_view pattern)
Find all occurrences of a pattern using KMP.
Array< size_t > boyer_moore_horspool_search(const std::string_view text, const std::string_view pattern)
Find all occurrences using Boyer-Moore-Horspool.
Array< size_t > suffix_array(const std::string_view text)
Build suffix array with doubling algorithm.