Aleph-w 3.0
A C++ Library for Data Structures and Algorithms
Loading...
Searching...
No Matches
parse_csv_test.cc
Go to the documentation of this file.
1
2/*
3 Aleph_w
4
5 Data structures & Algorithms
6 version 2.0.0b
7 https://github.com/lrleon/Aleph-w
8
9 This file is part of Aleph-w library
10
11 Copyright (c) 2002-2026 Leandro Rabindranath Leon
12
13 Permission is hereby granted, free of charge, to any person obtaining a copy
14 of this software and associated documentation files (the "Software"), to deal
15 in the Software without restriction, including without limitation the rights
16 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 copies of the Software, and to permit persons to whom the Software is
18 furnished to do so, subject to the following conditions:
19
20 The above copyright notice and this permission notice shall be included in all
21 copies or substantial portions of the Software.
22
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 SOFTWARE.
30*/
31
32
50#include <gtest/gtest.h>
51#include <sstream>
52#include <fstream>
53#include <cstdio>
54#include <parse-csv.H>
55
56using namespace Aleph;
57
58//============================================================================
59// csv_read_row Tests - Stream Input
60//============================================================================
61
62class CsvReadRowStreamTest : public ::testing::Test {};
63
65{
66 std::istringstream ss("a,b,c\n");
68
69 ASSERT_EQ(row.size(), 3u);
70 EXPECT_EQ(row(0), "a");
71 EXPECT_EQ(row(1), "b");
72 EXPECT_EQ(row(2), "c");
73}
74
76{
77 std::istringstream ss("hello\n");
79
80 ASSERT_EQ(row.size(), 1u);
81 EXPECT_EQ(row(0), "hello");
82}
83
85{
86 std::istringstream ss(",b,,d\n");
88
89 ASSERT_EQ(row.size(), 4u);
90 EXPECT_EQ(row(0), "");
91 EXPECT_EQ(row(1), "b");
92 EXPECT_EQ(row(2), "");
93 EXPECT_EQ(row(3), "d");
94}
95
97{
98 std::istringstream ss("a,\"hello world\",c\n");
100
101 ASSERT_EQ(row.size(), 3u);
102 EXPECT_EQ(row(0), "a");
103 EXPECT_EQ(row(1), "hello world");
104 EXPECT_EQ(row(2), "c");
105}
106
108{
109 std::istringstream ss("a,\"hello, world\",c\n");
111
112 ASSERT_EQ(row.size(), 3u);
113 EXPECT_EQ(row(0), "a");
114 EXPECT_EQ(row(1), "hello, world");
115 EXPECT_EQ(row(2), "c");
116}
117
119{
120 std::istringstream ss("a,\"say \"\"hi\"\"\",c\n");
122
123 ASSERT_EQ(row.size(), 3u);
124 EXPECT_EQ(row(0), "a");
125 EXPECT_EQ(row(1), "say \"hi\"");
126 EXPECT_EQ(row(2), "c");
127}
128
130{
131 std::istringstream ss("a,\"line1\nline2\",c\n");
133
134 ASSERT_EQ(row.size(), 3u);
135 EXPECT_EQ(row(0), "a");
136 EXPECT_EQ(row(1), "line1\nline2");
137 EXPECT_EQ(row(2), "c");
138}
139
141{
142 std::istringstream ss("a,b,c\r\n");
144
145 ASSERT_EQ(row.size(), 3u);
146 EXPECT_EQ(row(0), "a");
147 EXPECT_EQ(row(1), "b");
148 EXPECT_EQ(row(2), "c");
149}
150
152{
153 std::istringstream ss("a,b,c");
155
156 ASSERT_EQ(row.size(), 3u);
157 EXPECT_EQ(row(0), "a");
158 EXPECT_EQ(row(1), "b");
159 EXPECT_EQ(row(2), "c");
160}
161
163{
164 std::istringstream ss("a;b;c\n");
166
167 ASSERT_EQ(row.size(), 3u);
168 EXPECT_EQ(row(0), "a");
169 EXPECT_EQ(row(1), "b");
170 EXPECT_EQ(row(2), "c");
171}
172
174{
175 std::istringstream ss("a\tb\tc\n");
177
178 ASSERT_EQ(row.size(), 3u);
179 EXPECT_EQ(row(0), "a");
180 EXPECT_EQ(row(1), "b");
181 EXPECT_EQ(row(2), "c");
182}
183
185{
186 std::istringstream ss("a,b,c\n1,2,3\nx,y,z\n");
187
191
192 ASSERT_EQ(row1.size(), 3u);
193 EXPECT_EQ(row1(0), "a");
194
195 ASSERT_EQ(row2.size(), 3u);
196 EXPECT_EQ(row2(0), "1");
197
198 ASSERT_EQ(row3.size(), 3u);
199 EXPECT_EQ(row3(0), "x");
200}
201
203{
204 std::istringstream ss("");
206
207 // Empty stream produces one empty field (consistent with CSV parsers)
208 // An empty string "" still has one field (empty)
209 EXPECT_LE(row.size(), 1u);
210}
211
213{
214 std::istringstream ss(" a , b , c \n");
216
217 ASSERT_EQ(row.size(), 3u);
218 EXPECT_EQ(row(0), " a ");
219 EXPECT_EQ(row(1), " b ");
220 EXPECT_EQ(row(2), " c ");
221}
222
223//============================================================================
224// csv_read_row Tests - String Input
225//============================================================================
226
227class CsvReadRowStringTest : public ::testing::Test {};
228
230{
231 std::string line = "a,b,c";
233
234 ASSERT_EQ(row.size(), 3u);
235 EXPECT_EQ(row(0), "a");
236 EXPECT_EQ(row(1), "b");
237 EXPECT_EQ(row(2), "c");
238}
239
241{
242 std::string line = "a,\"hello, world\",c";
244
245 ASSERT_EQ(row.size(), 3u);
246 EXPECT_EQ(row(1), "hello, world");
247}
248
250{
251 std::string line = "a,\"say \"\"hello\"\"\",c";
253
254 ASSERT_EQ(row.size(), 3u);
255 EXPECT_EQ(row(1), "say \"hello\"");
256}
257
258//============================================================================
259// csv_read_all Tests
260//============================================================================
261
262class CsvReadAllTest : public ::testing::Test {};
263
265{
266 std::istringstream ss("a,b,c\n1,2,3\nx,y,z\n");
268
269 ASSERT_EQ(data.size(), 3u);
270 EXPECT_EQ(data(0)(0), "a");
271 EXPECT_EQ(data(1)(0), "1");
272 EXPECT_EQ(data(2)(0), "x");
273}
274
276{
277 std::istringstream ss("a,b,c\n");
279
280 ASSERT_EQ(data.size(), 1u);
281 EXPECT_EQ(data(0).size(), 3u);
282}
283
285{
286 std::istringstream ss("a,b,c\n1,2,3");
288
289 ASSERT_EQ(data.size(), 2u);
290 EXPECT_EQ(data(1)(2), "3");
291}
292
294{
295 std::istringstream ss("");
297
298 // Empty stream may produce 0 or 1 empty rows depending on implementation
299 EXPECT_LE(data.size(), 1u);
300}
301
303{
304 std::istringstream ss("a,b,c\n1,2\nx,y,z,w\n");
306
307 ASSERT_EQ(data.size(), 3u);
308 EXPECT_EQ(data(0).size(), 3u);
309 EXPECT_EQ(data(1).size(), 2u);
310 EXPECT_EQ(data(2).size(), 4u);
311}
312
314{
315 std::istringstream ss("a;b;c\n1;2;3\n");
317
318 ASSERT_EQ(data.size(), 2u);
319 EXPECT_EQ(data(0)(1), "b");
320 EXPECT_EQ(data(1)(1), "2");
321}
322
323//============================================================================
324// csv_escape Tests
325//============================================================================
326
327class CsvEscapeTest : public ::testing::Test {};
328
330{
331 EXPECT_EQ(csv_escape("hello"), "hello");
332 EXPECT_EQ(csv_escape("world123"), "world123");
333 EXPECT_EQ(csv_escape(""), "");
334}
335
337{
338 EXPECT_EQ(csv_escape("hello, world"), "\"hello, world\"");
339}
340
342{
343 EXPECT_EQ(csv_escape("say \"hi\""), "\"say \"\"hi\"\"\"");
344}
345
347{
348 EXPECT_EQ(csv_escape("line1\nline2"), "\"line1\nline2\"");
349}
350
352{
353 EXPECT_EQ(csv_escape("line1\rline2"), "\"line1\rline2\"");
354}
355
357{
358 EXPECT_EQ(csv_escape("a,\"b\"\nc"), "\"a,\"\"b\"\"\nc\"");
359}
360
362{
363 EXPECT_EQ(csv_escape("a;b", ';'), "\"a;b\"");
364 EXPECT_EQ(csv_escape("a,b", ';'), "a,b"); // Comma not special with semicolon delimiter
365}
366
367//============================================================================
368// csv_write_row Tests
369//============================================================================
370
371class CsvWriteRowTest : public ::testing::Test {};
372
374{
376 row.append("a");
377 row.append("b");
378 row.append("c");
379
380 std::ostringstream out;
382
383 EXPECT_EQ(out.str(), "a,b,c\n");
384}
385
387{
389 row.append("name");
390 row.append("hello, world"); // Contains comma
391 row.append("value");
392
393 std::ostringstream out;
395
396 EXPECT_EQ(out.str(), "name,\"hello, world\",value\n");
397}
398
400{
402 row.append("say \"hi\"");
403
404 std::ostringstream out;
406
407 EXPECT_EQ(out.str(), "\"say \"\"hi\"\"\"\n");
408}
409
411{
413
414 std::ostringstream out;
416
417 EXPECT_EQ(out.str(), "\n");
418}
419
421{
423 row.append("single");
424
425 std::ostringstream out;
427
428 EXPECT_EQ(out.str(), "single\n");
429}
430
432{
434 row.append("a");
435 row.append("b");
436 row.append("c");
437
438 std::ostringstream out;
439 csv_write_row(out, row, ';');
440
441 EXPECT_EQ(out.str(), "a;b;c\n");
442}
443
445{
447 row.append("a");
448 row.append("b");
449
450 std::ostringstream out;
451 csv_write_row(out, row, ',', "\r\n");
452
453 EXPECT_EQ(out.str(), "a,b\r\n");
454}
455
456//============================================================================
457// csv_write_all Tests
458//============================================================================
459
460class CsvWriteAllTest : public ::testing::Test {};
461
463{
465
467 row1.append("a");
468 row1.append("b");
469 data.append(row1);
470
472 row2.append("1");
473 row2.append("2");
474 data.append(row2);
475
476 std::ostringstream out;
477 csv_write_all(out, data);
478
479 EXPECT_EQ(out.str(), "a,b\n1,2\n");
480}
481
483{
485
486 std::ostringstream out;
487 csv_write_all(out, data);
488
489 EXPECT_EQ(out.str(), "");
490}
491
492//============================================================================
493// csv_read_file / csv_write_file Tests
494//============================================================================
495
496class CsvFileTest : public ::testing::Test
497{
498protected:
499 void SetUp() override
500 {
501 test_filename = "/tmp/aleph_csv_test_" + std::to_string(rand()) + ".csv";
502 }
503
504 void TearDown() override
505 {
506 std::remove(test_filename.c_str());
507 }
508
509 std::string test_filename;
510};
511
513{
514 // Create test data
516
517 Array<std::string> header;
518 header.append("name");
519 header.append("value");
520 data.append(header);
521
523 row1.append("item1");
524 row1.append("100");
525 data.append(row1);
526
528 row2.append("item2");
529 row2.append("200");
530 data.append(row2);
531
532 // Write to file
533 ASSERT_NO_THROW(csv_write_file(test_filename, data));
534
535 // Read back
537 ASSERT_NO_THROW(read_data = csv_read_file(test_filename));
538
539 // Verify
540 ASSERT_EQ(read_data.size(), 3u);
541 EXPECT_EQ(read_data(0)(0), "name");
542 EXPECT_EQ(read_data(0)(1), "value");
543 EXPECT_EQ(read_data(1)(0), "item1");
544 EXPECT_EQ(read_data(1)(1), "100");
545 EXPECT_EQ(read_data(2)(0), "item2");
546 EXPECT_EQ(read_data(2)(1), "200");
547}
548
550{
551 EXPECT_THROW(csv_read_file("/nonexistent/path/file.csv"), std::runtime_error);
552}
553
555{
558 row.append("test");
559 data.append(row);
560
561 EXPECT_THROW(csv_write_file("/nonexistent/path/file.csv", data), std::runtime_error);
562}
563
565{
567
569 row.append("field with, comma");
570 row.append("field with \"quotes\"");
571 row.append("field with\nnewline");
572 data.append(row);
573
574 csv_write_file(test_filename, data);
576
577 ASSERT_EQ(read_data.size(), 1u);
578 ASSERT_EQ(read_data(0).size(), 3u);
579 EXPECT_EQ(read_data(0)(0), "field with, comma");
580 EXPECT_EQ(read_data(0)(1), "field with \"quotes\"");
581 EXPECT_EQ(read_data(0)(2), "field with\nnewline");
582}
583
584//============================================================================
585// csv_num_columns Tests
586//============================================================================
587
588class CsvNumColumnsTest : public ::testing::Test {};
589
591{
592 std::string line = "a,b,c,d,e";
594
596}
597
603
605{
606 std::string line = "single";
608
610}
611
612//============================================================================
613// csv_is_rectangular Tests
614//============================================================================
615
616class CsvIsRectangularTest : public ::testing::Test {};
617
623
625{
626 std::istringstream ss("a,b,c\n1,2,3\nx,y,z\n");
628
630}
631
633{
634 std::istringstream ss("a,b,c\n1,2\nx,y,z,w\n");
636
638}
639
641{
642 std::istringstream ss("a,b,c\n");
644
646}
647
648//============================================================================
649// csv_get_column Tests
650//============================================================================
651
652class CsvGetColumnTest : public ::testing::Test {};
653
655{
656 std::istringstream ss("name,age\nAlice,30\nBob,25\n");
658
660
661 ASSERT_EQ(names.size(), 3u);
662 EXPECT_EQ(names(0), "name");
663 EXPECT_EQ(names(1), "Alice");
664 EXPECT_EQ(names(2), "Bob");
665}
666
668{
669 std::istringstream ss("name,age\nAlice,30\nBob,25\n");
671
673
674 ASSERT_EQ(ages.size(), 3u);
675 EXPECT_EQ(ages(0), "age");
676 EXPECT_EQ(ages(1), "30");
677 EXPECT_EQ(ages(2), "25");
678}
679
681{
682 std::istringstream ss("a,b\n1,2\n");
684
685 EXPECT_THROW(csv_get_column(data, 5), std::out_of_range);
686}
687
689{
691 Array<std::string> column = csv_get_column(data, 0);
692
693 EXPECT_EQ(column.size(), 0u);
694}
695
696//============================================================================
697// csv_to_number Tests
698//============================================================================
699
700class CsvToNumberTest : public ::testing::Test {};
701
708
710{
711 EXPECT_EQ(csv_to_number<long>("1000000000"), 1000000000L);
712 EXPECT_EQ(csv_to_number<long>("-999999999"), -999999999L);
713}
714
721
727
729{
730 EXPECT_THROW((void)csv_to_number<int>("not_a_number"), std::invalid_argument);
731 EXPECT_THROW((void)csv_to_number<int>(""), std::invalid_argument);
732}
733
735{
736 EXPECT_THROW((void)csv_to_number<double>("abc"), std::invalid_argument);
737}
738
740{
741 // std::stoi handles leading whitespace
742 EXPECT_EQ(csv_to_number<int>(" 42"), 42);
743}
744
745//============================================================================
746// Round-trip Tests
747//============================================================================
748
749class CsvRoundTripTest : public ::testing::Test {};
750
752{
753 // Original data
754 std::istringstream input("name,value\ntest,100\n");
756
757 // Write to string
758 std::ostringstream output;
760
761 // Parse back
762 std::istringstream input2(output.str());
764
765 // Compare
767 for (size_t i = 0; i < original.size(); ++i)
768 {
769 ASSERT_EQ(parsed(i).size(), original(i).size());
770 for (size_t j = 0; j < original(i).size(); ++j)
771 EXPECT_EQ(parsed(i)(j), original(i)(j));
772 }
773}
774
776{
777 // Create data with special characters
779
781 row1.append("simple");
782 row1.append("with, comma");
783 row1.append("with \"quotes\"");
784 row1.append("with\nnewline");
786
788 row2.append("\"\"\""); // Just quotes
789 row2.append(",,,"); // Just commas
790 row2.append("\n\r\n"); // Newlines
791 row2.append(""); // Empty
793
794 // Round trip
795 std::ostringstream output;
797
798 std::istringstream input(output.str());
800
801 // Verify
803 for (size_t i = 0; i < original.size(); ++i)
804 {
805 ASSERT_EQ(parsed(i).size(), original(i).size())
806 << "Row " << i << " size mismatch";
807 for (size_t j = 0; j < original(i).size(); ++j)
808 EXPECT_EQ(parsed(i)(j), original(i)(j))
809 << "Mismatch at row " << i << ", col " << j;
810 }
811}
812
813//============================================================================
814// Edge Cases Tests
815//============================================================================
816
817class CsvEdgeCasesTest : public ::testing::Test {};
818
820{
821 std::istringstream ss("\n\n\n");
823
824 // Each newline produces an empty row with one empty field
825 EXPECT_GE(data.size(), 1u);
826}
827
829{
830 std::string line = ",,,";
832
833 // Four fields (three delimiters = four empty fields)
834 ASSERT_EQ(row.size(), 4u);
835 for (size_t i = 0; i < row.size(); ++i)
836 EXPECT_EQ(row(i), "");
837}
838
840{
841 std::string line = "a,\"\",b";
843
844 ASSERT_EQ(row.size(), 3u);
845 EXPECT_EQ(row(0), "a");
846 EXPECT_EQ(row(1), "");
847 EXPECT_EQ(row(2), "b");
848}
849
851{
852 std::string line = "\"\"\"\""; // Field containing single quote
854
855 ASSERT_EQ(row.size(), 1u);
856 EXPECT_EQ(row(0), "\"");
857}
858
860{
861 std::string long_string(10000, 'x');
862 std::string line = "a," + long_string + ",b";
864
865 ASSERT_EQ(row.size(), 3u);
867}
868
870{
871 std::string line;
872 for (int i = 0; i < 100; ++i)
873 {
874 if (i > 0) line += ",";
875 line += std::to_string(i);
876 }
877
879
880 ASSERT_EQ(row.size(), 100u);
881 EXPECT_EQ(row(0), "0");
882 EXPECT_EQ(row(99), "99");
883}
884
886{
887 std::string line = "日本語,中文,한국어";
889
890 ASSERT_EQ(row.size(), 3u);
891 EXPECT_EQ(row(0), "日本語");
892 EXPECT_EQ(row(1), "中文");
893 EXPECT_EQ(row(2), "한국어");
894}
895
896//============================================================================
897// Integration Tests
898//============================================================================
899
901{
902 // 1. Parse CSV header and data
903 std::istringstream input(
904 "id,name,price,quantity\n"
905 "1,Widget,9.99,100\n"
906 "2,\"Gadget, Pro\",19.99,50\n"
907 "3,\"Thing \"\"Deluxe\"\"\",29.99,25\n"
908 );
909
911
912 // 2. Verify structure
913 ASSERT_EQ(data.size(), 4u);
915
916 // 3. Access header
917 EXPECT_EQ(data(0)(0), "id");
918 EXPECT_EQ(data(0)(1), "name");
919 EXPECT_EQ(data(0)(2), "price");
920 EXPECT_EQ(data(0)(3), "quantity");
921
922 // 4. Parse numeric data
923 EXPECT_EQ(csv_to_number<int>(data(1)(0)), 1);
924 EXPECT_DOUBLE_EQ(csv_to_number<double>(data(1)(2)), 9.99);
925 EXPECT_EQ(csv_to_number<int>(data(1)(3)), 100);
926
927 // 5. Handle special characters
928 EXPECT_EQ(data(2)(1), "Gadget, Pro");
929 EXPECT_EQ(data(3)(1), "Thing \"Deluxe\"");
930
931 // 6. Extract a column
933 EXPECT_EQ(names(1), "Widget");
934 EXPECT_EQ(names(2), "Gadget, Pro");
935}
936
937//============================================================================
938// Type Traits Tests
939//============================================================================
940
942{
943 // Verify [[nodiscard]] attribute is respected
944 EXPECT_TRUE((std::is_same<decltype(csv_num_columns(std::declval<Array<std::string>&>())), size_t>::value));
945 EXPECT_TRUE((std::is_same<decltype(csv_is_rectangular(std::declval<Array<Array<std::string>>&>())), bool>::value));
946}
947
948//============================================================================
949// CsvRow Tests
950//============================================================================
951
952class CsvRowTest : public ::testing::Test {};
953
955{
956 Array<std::string> fields;
957 fields.append("a");
958 fields.append("b");
959 fields.append("c");
960
961 CsvRow row(fields);
962
963 EXPECT_EQ(row[0], "a");
964 EXPECT_EQ(row[1], "b");
965 EXPECT_EQ(row[2], "c");
966}
967
969{
970 Array<std::string> header;
971 header.append("name");
972 header.append("age");
973 header.append("city");
974
975 Array<std::string> fields;
976 fields.append("Alice");
977 fields.append("30");
978 fields.append("NYC");
979
980 CsvRow row(fields, header);
981
982 EXPECT_EQ(row["name"], "Alice");
983 EXPECT_EQ(row["age"], "30");
984 EXPECT_EQ(row["city"], "NYC");
985}
986
988{
989 Array<std::string> fields;
990 fields.append("value");
991
992 CsvRow row(fields);
993
994 EXPECT_THROW((void)row["column"], std::runtime_error);
995}
996
998{
999 Array<std::string> header;
1000 header.append("name");
1001
1002 Array<std::string> fields;
1003 fields.append("Alice");
1004
1005 CsvRow row(fields, header);
1006
1007 EXPECT_THROW((void)row["unknown"], std::out_of_range);
1008}
1009
1011{
1012 Array<std::string> header;
1013 header.append("name");
1014 header.append("age");
1015
1016 Array<std::string> fields;
1017 fields.append("Alice");
1018 fields.append("30");
1019
1020 CsvRow row(fields, header);
1021
1022 EXPECT_TRUE(row.has_column("name"));
1023 EXPECT_TRUE(row.has_column("age"));
1024 EXPECT_FALSE(row.has_column("unknown"));
1025}
1026
1028{
1029 Array<std::string> header;
1030 header.append("name");
1031 header.append("value");
1032
1033 Array<std::string> fields;
1034 fields.append("test");
1035 fields.append("42");
1036
1037 CsvRow row(fields, header);
1038
1039 EXPECT_EQ(row.get<int>(1), 42);
1040 EXPECT_EQ(row.get<int>("value"), 42);
1041}
1042
1044{
1045 Array<std::string> fields;
1046 CsvRow empty_row(fields);
1048 EXPECT_EQ(empty_row.size(), 0u);
1049
1050 fields.append("a");
1051 CsvRow non_empty(fields);
1053 EXPECT_EQ(non_empty.size(), 1u);
1054}
1055
1056//============================================================================
1057// CsvReader Tests
1058//============================================================================
1059
1060class CsvReaderTest : public ::testing::Test
1061{
1062protected:
1063 void SetUp() override
1064 {
1065 test_filename = "/tmp/aleph_csv_reader_test_" + std::to_string(rand()) + ".csv";
1066
1067 // Create test file
1068 std::ofstream file(test_filename);
1069 file << "name,age,city\n";
1070 file << "Alice,30,NYC\n";
1071 file << "Bob,25,LA\n";
1072 file << "Charlie,35,Chicago\n";
1073 file.close();
1074 }
1075
1076 void TearDown() override
1077 {
1078 std::remove(test_filename.c_str());
1079 }
1080
1081 std::string test_filename;
1082};
1083
1085{
1086 CsvReader reader(test_filename);
1087 reader.read_header();
1088
1089 EXPECT_TRUE(reader.has_header());
1090 EXPECT_EQ(reader.header().size(), 3u);
1091 EXPECT_EQ(reader.header()(0), "name");
1092}
1093
1095{
1096 CsvReader reader(test_filename);
1097 reader.read_header();
1098
1099 EXPECT_TRUE(reader.has_next());
1100
1101 CsvRow row1 = reader.next_row();
1102 EXPECT_EQ(row1["name"], "Alice");
1103 EXPECT_EQ(row1["age"], "30");
1104
1105 CsvRow row2 = reader.next_row();
1106 EXPECT_EQ(row2["name"], "Bob");
1107
1108 CsvRow row3 = reader.next_row();
1109 EXPECT_EQ(row3["name"], "Charlie");
1110
1111 EXPECT_FALSE(reader.has_next());
1112}
1113
1115{
1116 CsvReader reader(test_filename);
1117 reader.read_header();
1118 reader.skip(1); // Skip Alice
1119
1120 CsvRow row = reader.next_row();
1121 EXPECT_EQ(row["name"], "Bob");
1122}
1123
1125{
1126 CsvReader reader(test_filename);
1127 reader.read_header();
1128
1129 EXPECT_EQ(reader.rows_read(), 0u);
1130 reader.next();
1131 EXPECT_EQ(reader.rows_read(), 1u);
1132 reader.next();
1133 EXPECT_EQ(reader.rows_read(), 2u);
1134}
1135
1137{
1138 CsvReader reader(test_filename);
1139 reader.read_header();
1140
1141 size_t count = 0;
1142 for (const auto & row : reader)
1143 {
1144 (void)row;
1145 ++count;
1146 }
1147
1148 EXPECT_EQ(count, 3u);
1149}
1150
1152{
1153 std::istringstream ss("a,b\n1,2\n");
1155
1157 EXPECT_EQ(row1(0), "a");
1158
1160 EXPECT_EQ(row2(0), "1");
1161}
1162
1163//============================================================================
1164// Filter and Selection Tests
1165//============================================================================
1166
1167class CsvFilterTest : public ::testing::Test
1168{
1169protected:
1170 void SetUp() override
1171 {
1172 std::istringstream ss("name,status\nAlice,active\nBob,inactive\nCharlie,active\n");
1173 data = csv_read_all(ss);
1174 }
1175
1177};
1178
1180{
1181 auto result = csv_filter(data, [](const Array<std::string> & row) {
1182 return row.size() > 1 and row(1) == "active";
1183 });
1184
1185 EXPECT_EQ(result.size(), 2u); // Alice and Charlie
1186}
1187
1189{
1190 auto result = csv_filter_by_value(data, 1, "active");
1191
1192 EXPECT_EQ(result.size(), 2u);
1193}
1194
1196{
1197 Array<size_t> cols;
1198 cols.append(0); // name only
1199
1200 auto result = csv_select_columns(data, cols);
1201
1202 EXPECT_EQ(result.size(), data.size());
1203 EXPECT_EQ(result(0).size(), 1u);
1204 EXPECT_EQ(result(0)(0), "name");
1205}
1206
1208{
1209 auto result = csv_skip_rows(data, 1); // Skip header
1210
1211 EXPECT_EQ(result.size(), 3u);
1212 EXPECT_EQ(result(0)(0), "Alice");
1213}
1214
1216{
1217 auto result = csv_take_rows(data, 2); // Header + Alice
1218
1219 EXPECT_EQ(result.size(), 2u);
1220 EXPECT_EQ(result(1)(0), "Alice");
1221}
1222
1223//============================================================================
1224// Statistics Tests
1225//============================================================================
1226
1227class CsvStatisticsTest : public ::testing::Test
1228{
1229protected:
1230 void SetUp() override
1231 {
1232 std::istringstream ss("a,b,c\n1,,3\n4,5,\n7,8,9\n");
1233 data = csv_read_all(ss);
1234 }
1235
1237};
1238
1243
1245{
1246 EXPECT_EQ(csv_count_empty(data), 2u); // Two empty fields
1247}
1248
1250{
1251 auto count = csv_count_if(data, [](const Array<std::string> & row) {
1252 return row.size() >= 3 and row(2) == "";
1253 });
1254
1255 EXPECT_EQ(count, 1u); // Row "4,5,"
1256}
1257
1259{
1260 auto idx = csv_find_row(data, [](const Array<std::string> & row) {
1261 return row.size() > 0 and row(0) == "4";
1262 });
1263
1264 EXPECT_EQ(idx, 2u);
1265}
1266
1268{
1269 auto idx = csv_find_by_value(data, 0, "7");
1270 EXPECT_EQ(idx, 3u);
1271}
1272
1274{
1275 auto idx = csv_find_by_value(data, 0, "xyz");
1276 EXPECT_EQ(idx, data.size()); // Not found
1277}
1278
1280{
1281 std::istringstream ss("cat\ndog\ncat\nbird\ndog\n");
1282 auto animals = csv_read_all(ss);
1283
1284 auto unique = csv_distinct(animals, 0);
1285 EXPECT_EQ(unique.size(), 3u); // cat, dog, bird
1286}
1287
1288//============================================================================
1289// Transformation Tests
1290//============================================================================
1291
1292class CsvTransformationTest : public ::testing::Test
1293{
1294protected:
1295 void SetUp() override
1296 {
1297 std::istringstream ss("a,b,c\n1,2,3\n4,5,6\n");
1298 data = csv_read_all(ss);
1299 }
1300
1302};
1303
1305{
1306 auto result = csv_transpose(data);
1307
1308 EXPECT_EQ(result.size(), 3u); // 3 columns become 3 rows
1309 EXPECT_EQ(result(0).size(), 3u); // 3 rows become 3 columns
1310 EXPECT_EQ(result(0)(0), "a");
1311 EXPECT_EQ(result(0)(1), "1");
1312 EXPECT_EQ(result(0)(2), "4");
1313}
1314
1316{
1317 std::istringstream ss("charlie,3\nalice,1\nbob,2\n");
1318 auto unsorted = csv_read_all(ss);
1319
1320 auto sorted = csv_sort_by_column(unsorted, 0); // Sort by name
1321
1322 EXPECT_EQ(sorted(0)(0), "alice");
1323 EXPECT_EQ(sorted(1)(0), "bob");
1324 EXPECT_EQ(sorted(2)(0), "charlie");
1325}
1326
1328{
1329 std::istringstream ss("charlie,30\nalice,10\nbob,20\n");
1330 auto unsorted = csv_read_all(ss);
1331
1332 auto sorted = csv_sort_by_column_numeric<int>(unsorted, 1); // Sort by value
1333
1334 EXPECT_EQ(sorted(0)(0), "alice"); // 10
1335 EXPECT_EQ(sorted(1)(0), "bob"); // 20
1336 EXPECT_EQ(sorted(2)(0), "charlie"); // 30
1337}
1338
1340{
1341 std::istringstream ss("a,b\n1,2\n3,4\n1,2\n5,6\n1,2\n");
1342 auto with_dupes = csv_read_all(ss);
1343
1345
1346 EXPECT_EQ(unique.size(), 4u); // header + 3 unique data rows
1347}
1348
1350{
1351 auto upper = csv_transform(data, [](const std::string & s) {
1352 std::string result = s;
1353 for (char & c : result)
1354 c = std::toupper(static_cast<unsigned char>(c));
1355 return result;
1356 });
1357
1358 EXPECT_EQ(upper(0)(0), "A");
1359 EXPECT_EQ(upper(0)(1), "B");
1360}
1361
1363{
1364 auto with_sum = csv_add_column(data, [](const Array<std::string> & row) {
1365 if (row.size() < 3) return std::string("N/A");
1366 try {
1367 int sum = std::stoi(row(0)) + std::stoi(row(1)) + std::stoi(row(2));
1368 return std::to_string(sum);
1369 } catch (...) {
1370 return std::string("N/A");
1371 }
1372 });
1373
1374 EXPECT_EQ(with_sum(0).size(), 4u); // Original 3 + new column
1375 EXPECT_EQ(with_sum(1)(3), "6"); // 1+2+3
1376 EXPECT_EQ(with_sum(2)(3), "15"); // 4+5+6
1377}
1378
1380{
1381 auto renamed = csv_rename_column(data, "a", "first_col");
1382
1383 EXPECT_EQ(renamed(0)(0), "first_col");
1384 EXPECT_EQ(renamed(0)(1), "b"); // Unchanged
1385}
1386
1387//============================================================================
1388// Utility Tests
1389//============================================================================
1390
1391class CsvUtilityTest : public ::testing::Test {};
1392
1394{
1395 // Create stream with UTF-8 BOM (3 bytes: EF BB BF)
1396 std::string bom_str;
1397 bom_str += '\xEF';
1398 bom_str += '\xBB';
1399 bom_str += '\xBF';
1400 bom_str += "a,b,c\n";
1401 std::istringstream ss(bom_str);
1402
1404
1405 auto row = csv_read_row(ss);
1406 EXPECT_EQ(row(0), "a");
1407}
1408
1410{
1411 std::istringstream ss("a,b,c\n");
1412
1414
1415 auto row = csv_read_row(ss);
1416 EXPECT_EQ(row(0), "a"); // Still reads correctly
1417}
1418
1420{
1421 std::istringstream ss(" a , b , c \n");
1422 auto data = csv_read_all(ss);
1423
1424 auto trimmed = csv_trim_fields(data);
1425
1426 EXPECT_EQ(trimmed(0)(0), "a");
1427 EXPECT_EQ(trimmed(0)(1), "b");
1428 EXPECT_EQ(trimmed(0)(2), "c");
1429}
1430
1432{
1433 std::istringstream ss("a,,c\n");
1434 auto data = csv_read_all(ss);
1435
1436 auto filled = csv_fill_empty(data, "N/A");
1437
1438 EXPECT_EQ(filled(0)(0), "a");
1439 EXPECT_EQ(filled(0)(1), "N/A");
1440 EXPECT_EQ(filled(0)(2), "c");
1441}
1442
1443//============================================================================
1444// Join Tests
1445//============================================================================
1446
1447class CsvJoinTest : public ::testing::Test {};
1448
1450{
1451 std::istringstream ss1("a,b\n1,2\n");
1452 std::istringstream ss2("c,d\n3,4\n");
1453
1454 auto left = csv_read_all(ss1);
1455 auto right = csv_read_all(ss2);
1456
1457 auto joined = csv_join_horizontal(left, right);
1458
1459 EXPECT_EQ(joined.size(), 2u);
1460 EXPECT_EQ(joined(0).size(), 4u);
1461 EXPECT_EQ(joined(0)(0), "a");
1462 EXPECT_EQ(joined(0)(2), "c");
1463}
1464
1466{
1467 std::istringstream ss1("a,b\n1,2\n");
1468 std::istringstream ss2("3,4\n5,6\n");
1469
1470 auto top = csv_read_all(ss1);
1471 auto bottom = csv_read_all(ss2);
1472
1473 auto joined = csv_join_vertical(top, bottom);
1474
1475 EXPECT_EQ(joined.size(), 4u);
1476 EXPECT_EQ(joined(2)(0), "3");
1477}
1478
1480{
1481 std::istringstream ss1("id,name\n1,Alice\n2,Bob\n3,Charlie\n");
1482 std::istringstream ss2("id,dept\n1,Sales\n3,Engineering\n4,HR\n");
1483
1484 auto left = csv_read_all(ss1);
1485 auto right = csv_read_all(ss2);
1486
1487 // Skip headers for join
1488 auto left_data = csv_skip_rows(left, 1);
1489 auto right_data = csv_skip_rows(right, 1);
1490
1492
1493 EXPECT_EQ(joined.size(), 2u); // Only 1 and 3 match
1494 EXPECT_EQ(joined(0)(1), "Alice");
1495 EXPECT_EQ(joined(0)(2), "Sales");
1496}
1497
1499{
1500 std::istringstream ss("dept,name\nSales,Alice\nEng,Bob\nSales,Charlie\nEng,Dave\n");
1501 auto data = csv_read_all(ss);
1502 auto data_rows = csv_skip_rows(data, 1);
1503
1504 auto groups = csv_group_by(data_rows, 0);
1505
1506 EXPECT_EQ(groups.size(), 2u); // Sales and Eng
1507
1508 // Each group should have 2 members
1509 EXPECT_EQ(groups(0).size(), 2u);
1510 EXPECT_EQ(groups(1).size(), 2u);
1511}
1512
1513//============================================================================
1514// Main
1515//============================================================================
1516
1517int main(int argc, char **argv)
1518{
1519 ::testing::InitGoogleTest(&argc, argv);
1520 return RUN_ALL_TESTS();
1521}
int main()
Simple dynamic array with automatic resizing and functional operations.
Definition tpl_array.H:138
constexpr size_t size() const noexcept
Return the number of elements stored in the stack.
Definition tpl_array.H:333
T & append(const T &data)
Append a copy of data
Definition tpl_array.H:239
Lazy CSV reader for large files.
Definition parse-csv.H:711
A CSV row with header-based field access.
Definition parse-csv.H:601
T & append(const T &item)
Append a new item by copy.
Definition htlist.H:1562
void empty() noexcept
empty the list
Definition htlist.H:1689
size_t size() const noexcept
Count the number of elements of the list.
Definition htlist.H:1319
void SetUp() override
void TearDown() override
std::string test_filename
Array< Array< std::string > > data
void SetUp() override
void SetUp() override
void TearDown() override
std::string test_filename
void SetUp() override
Array< Array< std::string > > data
Array< Array< std::string > > data
#define TEST(name)
Main namespace for Aleph-w library functions.
Definition ah-arena.H:89
long csv_to_number< long >(const std::string &field)
Definition parse-csv.H:555
size_t csv_find_row(const Array< Array< std::string > > &rows, Pred predicate)
Find first row matching a predicate.
Definition parse-csv.H:1010
Array< Array< std::string > > csv_trim_fields(const Array< Array< std::string > > &rows)
Trim whitespace from all fields.
Definition parse-csv.H:1273
std::string csv_escape(const std::string &field, char delimiter=',')
Escape a string for CSV output.
Definition parse-csv.H:320
Array< Array< std::string > > csv_read_file(const std::string &filename, char delimiter=',')
Read all rows from a CSV file.
Definition parse-csv.H:290
Array< Array< std::string > > csv_transform(const Array< Array< std::string > > &rows, Func func)
Apply a transformation to each field.
Definition parse-csv.H:1219
Itor unique(Itor __first, Itor __last, BinaryPredicate __binary_pred=BinaryPredicate())
Remove consecutive duplicates in place.
Definition ahAlgo.H:1058
Array< Array< std::string > > csv_filter_by_value(const Array< Array< std::string > > &rows, size_t col_index, const std::string &value)
Filter CSV rows by column value.
Definition parse-csv.H:880
size_t size(Node *root) noexcept
bool csv_skip_bom(std::istream &in)
Skip UTF-8 BOM if present.
Definition parse-csv.H:1250
Array< Array< std::string > > csv_transpose(const Array< Array< std::string > > &rows)
Transpose CSV data (swap rows and columns).
Definition parse-csv.H:1067
Array< Array< std::string > > csv_rename_column(const Array< Array< std::string > > &rows, const std::string &old_name, const std::string &new_name)
Rename a column (in the header row).
Definition parse-csv.H:1490
Array< Array< std::string > > csv_take_rows(const Array< Array< std::string > > &rows, size_t n)
Take only the first N rows of CSV data.
Definition parse-csv.H:955
Array< std::string > csv_get_column(const Array< Array< std::string > > &rows, size_t col_index)
Get a column from CSV data.
Definition parse-csv.H:497
Array< Array< std::string > > csv_join_horizontal(const Array< Array< std::string > > &left, const Array< Array< std::string > > &right)
Join two CSV datasets horizontally (add columns).
Definition parse-csv.H:1319
Array< Array< std::string > > csv_unique(const Array< Array< std::string > > &rows)
Remove duplicate rows.
Definition parse-csv.H:1183
Array< Array< Array< std::string > > > csv_group_by(const Array< Array< std::string > > &rows, size_t col_index)
Group rows by a column value.
Definition parse-csv.H:1424
float csv_to_number< float >(const std::string &field)
Definition parse-csv.H:569
void csv_write_row(std::ostream &out, const Array< std::string > &row, char delimiter=',', const std::string &line_ending="\n")
Write a CSV row to an output stream.
Definition parse-csv.H:376
size_t csv_num_columns(const Array< std::string > &row)
Get the number of columns in a CSV row.
Definition parse-csv.H:444
size_t csv_count_empty(const Array< Array< std::string > > &rows)
Count empty fields in CSV data.
Definition parse-csv.H:981
Array< std::string > csv_read_row(std::istream &in, char delimiter=',')
Read a single CSV row from an input stream.
Definition parse-csv.H:158
size_t csv_find_by_value(const Array< Array< std::string > > &rows, size_t col_index, const std::string &value)
Find row where column equals value.
Definition parse-csv.H:1023
bool csv_is_rectangular(const Array< Array< std::string > > &rows)
Check if all rows have the same number of columns.
Definition parse-csv.H:466
Array< std::string > csv_distinct(const Array< Array< std::string > > &rows, size_t col_index)
Get distinct values in a column.
Definition parse-csv.H:1036
Array< Array< std::string > > csv_filter(const Array< Array< std::string > > &rows, Pred predicate)
Filter CSV rows by a predicate.
Definition parse-csv.H:859
Array< Array< std::string > > csv_read_all(std::istream &in, char delimiter=',')
Read all rows from a CSV input stream.
Definition parse-csv.H:253
int csv_to_number< int >(const std::string &field)
Definition parse-csv.H:548
double csv_to_number< double >(const std::string &field)
Definition parse-csv.H:562
Array< Array< std::string > > csv_inner_join(const Array< Array< std::string > > &left, size_t left_key_col, const Array< Array< std::string > > &right, size_t right_key_col)
Inner join two CSV datasets by a key column.
Definition parse-csv.H:1379
void csv_write_file(const std::string &filename, const Array< Array< std::string > > &rows, char delimiter=',', const std::string &line_ending="\n")
Write CSV data to a file.
Definition parse-csv.H:424
Array< Array< std::string > > csv_skip_rows(const Array< Array< std::string > > &rows, size_t n)
Skip the first N rows of CSV data.
Definition parse-csv.H:937
Array< Array< std::string > > csv_select_columns(const Array< Array< std::string > > &rows, const Array< size_t > &col_indices)
Select specific columns from CSV data.
Definition parse-csv.H:906
Array< Array< std::string > > csv_fill_empty(const Array< Array< std::string > > &rows, const std::string &default_value)
Replace empty fields with a default value.
Definition parse-csv.H:1299
size_t csv_count_if(const Array< Array< std::string > > &rows, Pred predicate)
Count rows matching a predicate.
Definition parse-csv.H:995
Array< Array< std::string > > csv_join_vertical(const Array< Array< std::string > > &top, const Array< Array< std::string > > &bottom)
Join two CSV datasets vertically (add rows).
Definition parse-csv.H:1354
Array< Array< std::string > > csv_add_column(const Array< Array< std::string > > &rows, Func func)
Add a new column with computed values.
Definition parse-csv.H:1463
Array< Array< std::string > > csv_sort_by_column(const Array< Array< std::string > > &rows, size_t col_index, bool ascending=true)
Sort CSV data by a column.
Definition parse-csv.H:1100
void csv_write_all(std::ostream &out, const Array< Array< std::string > > &rows, char delimiter=',', const std::string &line_ending="\n")
Write multiple CSV rows to an output stream.
Definition parse-csv.H:402
DynList< T > maps(const C &c, Op op)
Classic map operation.
Itor::difference_type count(const Itor &beg, const Itor &end, const T &value)
Count elements equal to a value.
Definition ahAlgo.H:127
size_t csv_count_rows(const Array< Array< std::string > > &rows)
Count total number of rows.
Definition parse-csv.H:973
T sum(const Container &container, const T &init=T{})
Compute sum of all elements.
Comprehensive CSV (Comma-Separated Values) parsing and manipulation utilities.
TEST_F(CsvReadRowStreamTest, SimpleRow)
fstream file[12]
Definition treapObs.C:67
ofstream output
Definition writeHeap.C:213