Aleph-w 3.0
A C++ Library for Data Structures and Algorithms
Loading...
Searching...
No Matches
ah-string-utils.cc
Go to the documentation of this file.
1
2/*
3 Aleph_w
4
5 Data structures & Algorithms
6 version 2.0.0b
7 https://github.com/lrleon/Aleph-w
8
9 This file is part of Aleph-w library
10
11 Copyright (c) 2002-2026 Leandro Rabindranath Leon
12
13 Permission is hereby granted, free of charge, to any person obtaining a copy
14 of this software and associated documentation files (the "Software"), to deal
15 in the Software without restriction, including without limitation the rights
16 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 copies of the Software, and to permit persons to whom the Software is
18 furnished to do so, subject to the following conditions:
19
20 The above copyright notice and this permission notice shall be included in all
21 copies or substantial portions of the Software.
22
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 SOFTWARE.
30*/
31
32
38//
39// Created by lrleon on 23/04/24.
40//
41# include <gtest/gtest.h>
42
43# include <cstdlib>
44# include <limits>
45# include <random>
46# include <stdexcept>
47# include <vector>
48
49# include <ah-string-utils.H>
50
51using namespace std;
52using namespace testing;
53using namespace Aleph;
54
56{
57 string s1 = "hello";
58 string s2 = "world";
59 string blank = " ";
60 string s3 = "!";
61 string s = concat(s1, blank, s2, s3);
62 ASSERT_EQ(s, "hello world!");
63}
64
66{
67 vector<int> v = {1, 2, 3};
68 EXPECT_EQ(Aleph::to_string(v), "1, 2, 3");
69 vector<int> e;
71}
72
74{
75 Array<int> a;
77 a.append(1);
79 a.append(2);
80 a.append(3);
81 EXPECT_EQ(Aleph::to_string(a), "1, 2, 3");
82}
83
85{
86 {
87 string s = "\t abc \n";
88 EXPECT_EQ(trim(s), "abc");
89 EXPECT_EQ(s, "\t abc \n");
90 }
91
92 {
93 string s = "\t abc \n";
94 EXPECT_EQ(trim_in_place(s), "abc");
95 EXPECT_EQ(s, "abc");
96 }
97
98 {
99 string s = "";
100 EXPECT_EQ(trim(s), "");
101 EXPECT_EQ(trim_in_place(s), "");
102 }
103}
104
106{
107 EXPECT_TRUE(contains("hello world", "world"));
108 EXPECT_FALSE(contains("hello", "xyz"));
109 EXPECT_TRUE(contains("", ""));
110}
111
113{
114 EXPECT_EQ(Aleph::to_string(1.5, 2), "1.50");
115 EXPECT_EQ(Aleph::to_string(1.0, 0), "1");
116 auto s = to_str(1.0 / 3.0);
117 EXPECT_FALSE(s.empty());
118}
119
121{
122 EXPECT_EQ(Aleph::tolower("HeLLo"), "hello");
123 EXPECT_EQ(Aleph::toupper("HeLLo"), "HELLO");
124
125 string s = "HeLLo";
126 EXPECT_EQ(mutable_tolower(s), "hello");
127 EXPECT_EQ(s, "hello");
128 EXPECT_EQ(mutable_toupper(s), "HELLO");
129 EXPECT_EQ(s, "HELLO");
130}
131
133{
134 string s;
135 s.push_back(static_cast<char>(0xFF));
136 s.push_back('A');
137 auto lower = Aleph::to_lower(s);
138 ASSERT_EQ(lower.size(), 2u);
139 EXPECT_EQ(lower[1], 'a');
140}
141
143{
144 EXPECT_EQ(only_alpha("A-bC_9"), "abc9");
145 EXPECT_EQ(remove_spaces(" a\tb\nc "), "abc");
146 EXPECT_EQ(remove_symbols("a-b_c", "-_"), "abc");
147 EXPECT_EQ(remove_symbols("", "-_"), "");
148}
149
151{
153 EXPECT_EQ(join(l, ","), "");
154 l.append(1);
155 l.append(2);
156 l.append(3);
157 EXPECT_EQ(join(l, ","), "1,2,3");
158 EXPECT_EQ(join(l, " - "), "1 - 2 - 3");
159}
160
162{
163 EXPECT_TRUE(is_long("0"));
164 EXPECT_TRUE(is_long("-10"));
166 EXPECT_FALSE(is_long("10x"));
167
169 EXPECT_TRUE(is_size_t("10"));
170 EXPECT_FALSE(is_size_t("-1"));
172 EXPECT_FALSE(is_size_t("10x"));
173
175 EXPECT_TRUE(is_double("-1.25"));
176 EXPECT_TRUE(is_double("1e3"));
178 EXPECT_FALSE(is_double("1.2x"));
179 EXPECT_FALSE(is_double("1e309"));
180
181 EXPECT_TRUE(is_float("0"));
182 EXPECT_TRUE(is_float("-1.25"));
184 EXPECT_FALSE(is_float("1.2x"));
185}
186
188{
189 EXPECT_TRUE(is_prefix("foobar", "foo"));
190 EXPECT_FALSE(is_prefix("foo", "foobar"));
191
192 string s = "prefix_value";
193 EXPECT_EQ(remove_prefix(s, "prefix_"), "value");
194 EXPECT_EQ(s, "value");
195}
196
198{
199 EXPECT_EQ(to_name(""), "");
200 EXPECT_EQ(to_name("hello"), "Hello");
201 EXPECT_EQ(to_name("Hello"), "Hello");
202}
203
205{
206 {
207 auto parts = split_camel_case("");
209 }
210
211 {
212 auto parts = split_camel_case("camelCaseString");
213 ASSERT_EQ(parts.size(), 3u);
214 EXPECT_EQ(parts.nth(0), "camel");
215 EXPECT_EQ(parts.nth(1), "Case");
216 EXPECT_EQ(parts.nth(2), "String");
217 }
218}
219
221{
222 {
223 auto v = split("a,b,,c", ',');
224 ASSERT_EQ(v.size(), 4u);
225 EXPECT_EQ(v[0], "a");
226 EXPECT_EQ(v[1], "b");
227 EXPECT_EQ(v[2], "");
228 EXPECT_EQ(v[3], "c");
229 }
230
231 {
232 auto l = split_to_list("a--b---c", "-");
233 ASSERT_EQ(l.size(), 3u);
234 EXPECT_EQ(l.nth(0), "a");
235 EXPECT_EQ(l.nth(1), "b");
236 EXPECT_EQ(l.nth(2), "c");
237 }
238
239 {
240 auto l = split_to_list("a b-c__d", " _-");
241 ASSERT_EQ(l.size(), 4u);
242 EXPECT_EQ(l.nth(0), "a");
243 EXPECT_EQ(l.nth(1), "b");
244 EXPECT_EQ(l.nth(2), "c");
245 EXPECT_EQ(l.nth(3), "d");
246 }
247
248 {
249 auto l = split_to_list("abc", "");
250 ASSERT_EQ(l.size(), 1u);
251 EXPECT_EQ(l.nth(0), "abc");
252 }
253}
254
256{
257 EXPECT_EQ(to_Pascalcase("hello_world"), "HelloWorld");
258 EXPECT_EQ(to_Pascalcase("alreadyPascal"), "AlreadyPascal");
259}
260
262{
263 string s = "abcd";
264 EXPECT_EQ(split_pos(s, 0), (pair<string, string>("", "abcd")));
265 EXPECT_EQ(split_pos(s, 2), (pair<string, string>("ab", "cd")));
266 EXPECT_EQ(split_pos(s, 4), (pair<string, string>("abcd", "")));
267 EXPECT_THROW(split_pos(s, 5), range_error);
268}
269
271{
272 EXPECT_THROW(split_n("abc", 0), range_error);
273 EXPECT_THROW(split_n("abc", 4), range_error);
274
275 auto l = split_n("abcdef", 4);
276 ASSERT_EQ(l.size(), 4u);
277 EXPECT_EQ(l.nth(0), "a");
278 EXPECT_EQ(l.nth(1), "b");
279 EXPECT_EQ(l.nth(2), "c");
280 EXPECT_EQ(l.nth(3), "def");
281}
282
284{
287 DynList<int> r2; r2.append(3);
288 m.append(r1);
289 m.append(r2);
290
291 auto out = complete_rows(m);
292 ASSERT_EQ(out.size(), 2u);
293 ASSERT_EQ(out.nth(0).size(), 2u);
294 ASSERT_EQ(out.nth(1).size(), 2u);
295 EXPECT_EQ(out.nth(1).nth(0), 3);
296 EXPECT_EQ(out.nth(1).nth(1), 0);
297}
298
300{
302 DynList<string> r1; r1.append("abcd"); r1.append("x");
303 DynList<string> r2; r2.append("ab"); r2.append("xyz");
304 mat.append(r1);
305 mat.append(r2);
306
307 DynList<size_t> lens; lens.append(2); lens.append(1);
308 auto formatted = Aleph::format_string(lens, mat);
309 ASSERT_EQ(formatted.size(), 2u);
310 ASSERT_EQ(formatted.nth(0).size(), 2u);
311}
312
314{
316 DynList<string> r1; r1.append("a"); r1.append("b"); r1.append("c");
317 mat.append(r1);
318 auto csv = format_string_csv(mat);
319 ASSERT_EQ(csv.size(), 1u);
320 EXPECT_EQ(csv.nth(0).nth(0), "a,");
321 EXPECT_EQ(csv.nth(0).nth(1), "b,");
322 EXPECT_EQ(csv.nth(0).nth(2), "c");
323}
324
326{
327 const string text = "one two three four five";
328 auto j = justify_text(text, 10, 2);
329 EXPECT_TRUE(contains(j, " "));
330
331 auto a = align_text_to_left(text, 10, 1);
332 EXPECT_TRUE(contains(a, " one"));
333
334 auto shifted = shift_lines_to_left("a\nb", 3);
335 EXPECT_EQ(shifted, " a\n b");
336}
337
343
354
356{
357 string s = "secret";
358 fill_string(s, 'x');
359 EXPECT_EQ(s, "xxxxxx");
360
361 string e;
362 fill_string(e, 'x');
363 EXPECT_TRUE(e.empty());
364}
365
367{
368 auto a = split_to_array("a b-c__d", " _-");
369 ASSERT_EQ(a.size(), 4u);
370 EXPECT_EQ(a[0], "a");
371 EXPECT_EQ(a[1], "b");
372 EXPECT_EQ(a[2], "c");
373 EXPECT_EQ(a[3], "d");
374}
375
377{
379 DynList<string> r1; r1.append("abcd"); r1.append("x");
380 DynList<string> r2; r2.append("ab"); r2.append("xyz");
381 mat.append(r1);
382 mat.append(r2);
383
385 ASSERT_EQ(formatted.size(), 2u);
386 ASSERT_EQ(formatted.nth(0).size(), 2u);
387 ASSERT_EQ(formatted.nth(1).size(), 2u);
388}
389
391{
393 DynList<string> r1; r1.append("a"); r1.append("b");
394 DynList<string> r2; r2.append("c"); r2.append("d");
395 mat.append(r1);
396 mat.append(r2);
397 auto s = Aleph::to_string(mat);
398 EXPECT_TRUE(contains(s, "a"));
399 EXPECT_TRUE(contains(s, "d"));
400
402 lines.append("x");
403 lines.append("y");
405}
406
408{
409 auto w = split_text_into_words(" a\t b\n c ");
410 ASSERT_EQ(w.size(), 3u);
411 EXPECT_EQ(w.nth(0), "a");
412 EXPECT_EQ(w.nth(1), "b");
413 EXPECT_EQ(w.nth(2), "c");
414
415 auto l = split_text_into_lines("a\nb\n");
416 ASSERT_EQ(l.size(), 2u);
417 EXPECT_EQ(l.nth(0), "a");
418 EXPECT_EQ(l.nth(1), "b");
419}
420
422{
423 const string text = "one two three four five";
424 auto j = justify_line_except_first(text, 10, 4);
425 EXPECT_TRUE(contains(j, "one"));
426 EXPECT_TRUE(contains(j, "\n"));
427
428 auto a = align_text_to_left_except_first(text, 10, 3);
429 EXPECT_TRUE(contains(a, "one"));
430 EXPECT_TRUE(contains(a, "\n"));
431}
432
434{
435 const auto max_st = std::numeric_limits<size_t>::max();
436 EXPECT_TRUE(is_size_t(::std::to_string(max_st)));
437 EXPECT_FALSE(is_size_t(::std::to_string(max_st) + "0"));
438
439 const auto max_l = std::numeric_limits<long>::max();
440 const auto min_l = std::numeric_limits<long>::min();
441 EXPECT_TRUE(is_long(::std::to_string(max_l)));
442 EXPECT_TRUE(is_long(::std::to_string(min_l)));
443 EXPECT_FALSE(is_long(::std::to_string(max_l) + "0"));
444}
445
447{
448 const char *v = std::getenv("ALEPH_STRESS");
449 if (v == nullptr or *v == '\0')
450 return 1;
451
452 char *end = nullptr;
453 const long m = std::strtol(v, &end, 10);
454 if (end == v or *end != '\0')
455 return 1;
456
457 if (m < 1)
458 return 1;
459
460 if (m > 50)
461 return 50;
462
463 return int(m);
464}
465
466static string random_string(std::mt19937 & rng, size_t len)
467{
468 std::uniform_int_distribution<int> byte_dist(0, 255);
469 string s;
470 s.reserve(len);
471 for (size_t i = 0; i < len; ++i)
472 s.push_back(static_cast<char>(byte_dist(rng)));
473 return s;
474}
475
476static string random_ascii_token(std::mt19937 & rng, size_t len)
477{
478 static constexpr char alphabet[] =
479 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
480 std::uniform_int_distribution<int> pick(0, int(sizeof(alphabet) - 2));
481 string s;
482 s.reserve(len);
483 for (size_t i = 0; i < len; ++i)
484 s.push_back(alphabet[pick(rng)]);
485 return s;
486}
487
489{
490 std::mt19937 rng(12345);
491 for (int iter = 0; iter < 2000*stress_multiplier(); ++iter)
492 {
493 const auto s = random_ascii_token(rng, size_t(iter % 64));
494 for (size_t pos = 0; pos <= s.size(); ++pos)
495 {
496 auto p = split_pos(s, pos);
497 EXPECT_EQ(p.first + p.second, s);
498 EXPECT_EQ(p.first.size(), pos);
499 }
500 }
501}
502
504{
505 std::mt19937 rng(54321);
506 for (int iter = 0; iter < 1500*stress_multiplier(); ++iter)
507 {
508 const auto s = random_ascii_token(rng, 1 + size_t(iter % 128));
509 const size_t n = 1 + (size_t(iter) % std::min<size_t>(16, s.size()));
510 auto parts = split_n(s, n);
511 ASSERT_EQ(parts.size(), n);
512 string recomposed;
513 parts.for_each([&](const string &x) { recomposed += x; });
515
516 const size_t base = s.size() / n;
517 for (size_t i = 0; i + 1 < n; ++i)
518 EXPECT_EQ(parts.nth(i).size(), base);
519 }
520}
521
523{
524 std::mt19937 rng(999);
525 static const string delims = " _-";
526 for (int iter = 0; iter < 2000*stress_multiplier(); ++iter)
527 {
528 string s;
529 const size_t tokens = 1 + (size_t(iter) % 12);
530 for (size_t i = 0; i < tokens; ++i)
531 {
532 if (i)
533 s.push_back(delims[size_t(iter + int(i)) % delims.size()]);
534 s += random_ascii_token(rng, 1 + (size_t(iter + int(i)) % 10));
535 }
536
537 auto out = split_to_list(s, delims);
539 out.for_each([&](const string &t)
540 {
541 EXPECT_FALSE(t.empty());
542 for (char c : delims)
543 EXPECT_EQ(t.find(c), string::npos);
544 });
545 }
546}
547
549{
550 std::mt19937 rng(2024);
551 for (int iter = 0; iter < 3000*stress_multiplier(); ++iter)
552 {
553 auto s = random_string(rng, size_t(iter % 128));
554 auto lo = Aleph::to_lower(s);
555 auto up = Aleph::to_upper(s);
556 EXPECT_EQ(lo.size(), s.size());
557 EXPECT_EQ(up.size(), s.size());
558 }
559}
560
562{
563 std::mt19937 rng(77);
564 std::uniform_int_distribution<int> dist(-100000, 100000);
565
566 for (int iter = 0; iter < 2000*stress_multiplier(); ++iter)
567 {
568 const int a = dist(rng);
569 const int b = dist(rng);
570 const int c = dist(rng);
571
572 const auto got = ::Aleph::build_pars_list(a, b, c);
573 const auto expected = ::std::to_string(a) + ", " + ::std::to_string(b) + ", " + ::std::to_string(c);
575 }
576}
String manipulation utilities.
static string random_ascii_token(std::mt19937 &rng, size_t len)
static string random_string(std::mt19937 &rng, size_t len)
static int stress_multiplier()
long double w
Definition btreepic.C:153
Simple dynamic array with automatic resizing and functional operations.
Definition tpl_array.H:138
T & append(const T &data)
Append a copy of data
Definition tpl_array.H:239
Dynamic singly linked list with functional programming support.
Definition htlist.H:1423
T & append(const T &item)
Append a new item by copy.
Definition htlist.H:1562
constexpr bool is_empty() const noexcept
Return true if list is empty.
Definition htlist.H:523
size_t size() const noexcept
Count the number of elements of the list.
Definition htlist.H:1319
void for_each(Operation &operation)
Traverse all the container and performs an operation on each element.
Definition ah-dry.H:685
Type & nth(const size_t n)
Return the n-th item of container.
Definition ah-dry.H:267
#define TEST(name)
static mt19937 rng
Main namespace for Aleph-w library functions.
Definition ah-arena.H:89
std::string tolower(const char *str)
Convert a C std::string to lower-case.
bool is_prefix(const std::string &str, const std::string &prefix)
Check whether prefix is a prefix of str.
std::string remove_symbols(const std::string &str, const std::string &symbols)
Remove any character appearing in symbols.
std::string to_upper(const std::string &str)
Convert a std::string to upper-case (byte-wise).
DynList< std::string > split_text_into_lines(const std::string &text)
Split a text into lines by "\n".
std::string justify_line_except_first(const std::string &text, const size_t width, const size_t left_margin=0)
Justify all lines except the first one.
DynList< std::string > split_text_into_words(const std::string &text)
Split a text into whitespace-separated words.
Array< std::string > split_to_array(const std::string &s, const std::string &delim)
Split a std::string into an Aleph::Array<std::string>.
std::pair< std::string, std::string > split_pos(const std::string &str, const size_t pos)
Split a std::string at a fixed position.
std::string remove_prefix(std::string &str, const std::string &prefix)
Remove prefix from str if present.
std::string & trim_in_place(std::string &s)
Trim a std::string in-place (leading + trailing whitespace removed).
std::string align_text_to_left(const std::string &text, const size_t page_width, const size_t left_margin=0)
Align text to the left by wrapping lines at page_width.
bool contains(const std::string_view &str, const std::string_view &substr)
Check if substr appears inside str.
std::string to_Pascalcase(const std::string &str)
Convert an identifier-like std::string to PascalCase.
DynList< DynList< std::string > > format_string_csv(const DynList< DynList< std::string > > &mat)
Produce a CSV-like matrix (commas added to all but last element in each row).
bool is_long(const std::string &str)
Check whether a std::string fully parses as a long.
DynList< std::string > split_n(const std::string &str, const size_t n)
Split a std::string into n parts.
std::string to_name(const std::string &str)
Uppercase the first character of str and return the resulting copy.
std::pair< First, Second > pair
Alias to std::pair kept for backwards compatibility.
Definition ahPair.H:89
bool is_float(const std::string &str)
Check whether a std::string fully parses as a finite float.
bool is_size_t(const std::string &str)
Check whether a std::string fully parses as a non-negative size_t.
std::string trim(const std::string &s)
Return a trimmed copy of a std::string (leading + trailing whitespace removed).
DynList< std::string > split_camel_case(const char *const str)
Split a camelCase / PascalCase std::string into tokens.
void build_pars_list(std::string &unused)
Base case for build_pars_list(std::string&, ...).
std::string remove_spaces(const std::string &str)
Remove all whitespace characters from a std::string.
std::string to_string(const time_t t, const std::string &format)
Format a time_t value into a string using format.
Definition ah-date.H:140
std::string to_str(const double d)
Convert double to a std::string with maximum round-trip precision.
std::string justify_text(const std::string &text, const size_t width, const size_t left_margin=0)
Justify a text to a target width.
bool is_double(const std::string &str)
Check whether a std::string fully parses as a finite double.
DynList< std::string > split_to_list(const std::string &s, const std::string &delim)
Split a std::string into an Aleph::DynList<std::string>.
std::string shift_lines_to_left(const std::string &str, const size_t n)
Indent every line in a multi-line std::string by n spaces.
std::string concat(const Args &... args)
Concatenate multiple streamable arguments into a single std::string.
DynList< DynList< T > > complete_rows(DynList< DynList< T > > &m)
Pad all rows of a matrix to the maximum row length.
std::string align_text_to_left_except_first(const std::string &text, const size_t width, const size_t left_margin=0)
Align all lines except the first one.
std::string toupper(const char *str)
Convert a C std::string to upper-case.
std::ostream & join(const C &c, const std::string &sep, std::ostream &out)
Join elements of an Aleph-style container into a stream.
std::vector< std::string > & split(const std::string &s, const char delim, std::vector< std::string > &elems)
Split a std::string by a single delimiter character.
DynList< DynList< std::string > > format_string(const DynList< size_t > &lens, const DynList< DynList< std::string > > &mat)
std::string to_lower(const std::string &str)
Convert a std::string to lower-case (byte-wise).
void fill_string(std::string &str, char sym)
Fill all the content of std::string with a defined char.
std::string & mutable_tolower(std::string &str)
Convert a std::string to lower-case in-place.
std::string & mutable_toupper(std::string &str)
Convert a std::string to upper-case in-place.
DynList< T > maps(const C &c, Op op)
Classic map operation.
std::string only_alpha(const std::string &str)
Extract alphanumeric ASCII characters and normalize letters to lower-case.
STL namespace.
DynList< int > l