Aleph-w 3.0
A C++ Library for Data Structures and Algorithms
Loading...
Searching...
No Matches
parse-csv.H
Go to the documentation of this file.
1
2/*
3 Aleph_w
4
5 Data structures & Algorithms
6 version 2.0.0b
7 https://github.com/lrleon/Aleph-w
8
9 This file is part of Aleph-w library
10
11 Copyright (c) 2002-2026 Leandro Rabindranath Leon
12
13 Permission is hereby granted, free of charge, to any person obtaining a copy
14 of this software and associated documentation files (the "Software"), to deal
15 in the Software without restriction, including without limitation the rights
16 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 copies of the Software, and to permit persons to whom the Software is
18 furnished to do so, subject to the following conditions:
19
20 The above copyright notice and this permission notice shall be included in all
21 copies or substantial portions of the Software.
22
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 SOFTWARE.
30*/
31
32
121#ifndef PARSE_CSV_H
122#define PARSE_CSV_H
123
124#include <sstream>
125#include <istream>
126#include <ostream>
127#include <fstream>
128#include <string>
129#include <tpl_array.H>
130
131namespace Aleph
132{
133
158inline Array<std::string> csv_read_row(std::istream & in, char delimiter = ',')
159{
160 std::stringstream ss;
161 bool inquotes = false;
162 Array<std::string> row; // Relying on RVO
163
164 while (in.good())
165 {
166 int ch = in.get();
167
168 // Check for EOF
169 if (ch == std::char_traits<char>::eof())
170 break;
171
172 char c = static_cast<char>(ch);
173
174 if (not inquotes and c == '"') // Begin quoted field
175 inquotes = true;
176 else if (inquotes and c == '"') // Quote character inside quoted field
177 {
178 if (in.peek() == '"') // Two consecutive quotes resolve to one
179 ss << static_cast<char>(in.get());
180 else // End of quoted field
181 inquotes = false;
182 }
183 else if (not inquotes and c == delimiter) // End of field
184 {
185 row.append(ss.str());
186 ss.str("");
187 }
188 else if (not inquotes and (c == '\r' or c == '\n')) // End of row
189 {
190 if (in.peek() == '\n') // Handle CRLF
191 in.get();
192 row.append(ss.str());
193 return row;
194 }
195 else
196 ss << c;
197 }
198
199 // Handle last field if stream ended without newline
200 if (ss.str().size() > 0 or row.size() > 0)
201 row.append(ss.str());
202
203 return row;
204}
205
224inline Array<std::string> csv_read_row(const std::string & line,
225 char delimiter = ',')
226{
227 std::stringstream ss(line);
228 return csv_read_row(ss, delimiter);
229}
230
254 char delimiter = ',')
255{
257
258 while (in.good())
259 {
261 if (row.size() == 0 and not in.good())
262 break;
263 result.append(std::move(row));
264 }
265
266 return result;
267}
268
290inline Array<Array<std::string>> csv_read_file(const std::string & filename,
291 char delimiter = ',')
292{
293 std::ifstream file(filename);
294 if (not file.is_open())
295 throw std::runtime_error("Cannot open file: " + filename);
296
297 return csv_read_all(file, delimiter);
298}
299
320inline std::string csv_escape(const std::string & field, char delimiter = ',')
321{
322 bool needs_quoting = false;
323
324 // Check if quoting is needed
325 for (char c : field)
326 {
327 if (c == delimiter or c == '"' or c == '\n' or c == '\r')
328 {
329 needs_quoting = true;
330 break;
331 }
332 }
333
334 if (not needs_quoting)
335 return field;
336
337 // Escape by quoting and doubling internal quotes
338 std::string result = "\"";
339 for (char c : field)
340 {
341 if (c == '"')
342 result += "\"\"";
343 else
344 result += c;
345 }
346 result += "\"";
347
348 return result;
349}
350
376inline void csv_write_row(std::ostream & out,
377 const Array<std::string> & row,
378 char delimiter = ',',
379 const std::string & line_ending = "\n")
380{
381 for (size_t i = 0; i < row.size(); ++i)
382 {
383 if (i > 0)
384 out << delimiter;
385 out << csv_escape(row(i), delimiter);
386 }
387 out << line_ending;
388}
389
402inline void csv_write_all(std::ostream & out,
403 const Array<Array<std::string>> & rows,
404 char delimiter = ',',
405 const std::string & line_ending = "\n")
406{
407 for (size_t i = 0; i < rows.size(); ++i)
409}
410
424inline void csv_write_file(const std::string & filename,
425 const Array<Array<std::string>> & rows,
426 char delimiter = ',',
427 const std::string & line_ending = "\n")
428{
429 std::ofstream file(filename);
430 if (not file.is_open())
431 throw std::runtime_error("Cannot open file for writing: " + filename);
432
434}
435
445{
446 return row.size();
447}
448
467{
468 if (rows.size() == 0)
469 return true;
470
471 size_t expected_cols = rows(0).size();
472 for (size_t i = 1; i < rows.size(); ++i)
473 if (rows(i).size() != expected_cols)
474 return false;
475
476 return true;
477}
478
498 size_t col_index)
499{
500 Array<std::string> column;
501
502 for (size_t i = 0; i < rows.size(); ++i)
503 {
504 if (col_index >= rows(i).size())
505 throw std::out_of_range("Column index " + std::to_string(col_index) +
506 " out of range for row " + std::to_string(i));
507 column.append(rows(i)(col_index));
508 }
509
510 return column;
511}
512
533template <typename T>
534[[nodiscard]] inline T csv_to_number(const std::string & field)
535{
536 std::istringstream ss(field);
537 T value;
538 ss >> value;
539
540 if (ss.fail())
541 throw std::invalid_argument("Cannot convert '" + field + "' to number");
542
543 return value;
544}
545
546// Template specialization for int (uses std::stoi for better error handling)
547template <>
548[[nodiscard]] inline int csv_to_number<int>(const std::string & field)
549{
550 return std::stoi(field);
551}
552
553// Template specialization for long (uses std::stol for better error handling)
554template <>
555[[nodiscard]] inline long csv_to_number<long>(const std::string & field)
556{
557 return std::stol(field);
558}
559
560// Template specialization for double (uses std::stod for better error handling)
561template <>
562[[nodiscard]] inline double csv_to_number<double>(const std::string & field)
563{
564 return std::stod(field);
565}
566
567// Template specialization for float (uses std::stof for better error handling)
568template <>
569[[nodiscard]] inline float csv_to_number<float>(const std::string & field)
570{
571 return std::stof(field);
572}
573
574//============================================================================
575// CsvRow - Row with header-based access
576//============================================================================
577
601{
604
605public:
609
613
615 [[nodiscard]] const std::string & operator[](size_t index) const
616 {
617 return fields_(index);
618 }
619
623 [[nodiscard]] const std::string & operator[](const std::string & column_name) const
624 {
625 if (header_ == nullptr)
626 throw std::runtime_error("No header set for CsvRow");
627
628 for (size_t i = 0; i < header_->size(); ++i)
629 if ((*header_)(i) == column_name)
630 return fields_(i);
631
632 throw std::out_of_range("Column not found: " + column_name);
633 }
634
636 [[nodiscard]] const std::string & at(size_t index) const
637 {
638 if (index >= fields_.size())
639 throw std::out_of_range("Index out of range: " + std::to_string(index));
640 return fields_(index);
641 }
642
644 [[nodiscard]] bool has_column(const std::string & column_name) const
645 {
646 if (header_ == nullptr) return false;
647 for (size_t i = 0; i < header_->size(); ++i)
648 if ((*header_)(i) == column_name)
649 return i < fields_.size();
650 return false;
651 }
652
654 template <typename T>
655 [[nodiscard]] T get(size_t index) const
656 {
657 return csv_to_number<T>(fields_(index));
658 }
659
661 template <typename T>
662 [[nodiscard]] T get(const std::string & column_name) const
663 {
664 return csv_to_number<T>((*this)[column_name]);
665 }
666
668 [[nodiscard]] size_t size() const { return fields_.size(); }
669
671 [[nodiscard]] bool empty() const { return fields_.size() == 0; }
672
674 [[nodiscard]] const Array<std::string> & fields() const { return fields_; }
675
677 [[nodiscard]] const Array<std::string> * header() const { return header_; }
678};
679
680//============================================================================
681// CsvReader - Iterator-based reader for large files
682//============================================================================
683
711{
712 std::ifstream file_;
713 std::istream * stream_;
719
720public:
723 explicit CsvReader(const std::string & filename, char delimiter = ',')
724 : file_(filename), stream_(&file_), owns_stream_(true),
726 {
727 if (not file_.is_open())
728 throw std::runtime_error("Cannot open file: " + filename);
729 }
730
732 explicit CsvReader(std::istream & stream, char delimiter = ',')
735
744
746 [[nodiscard]] bool has_next() const
747 {
748 return stream_->good() and stream_->peek() != std::char_traits<char>::eof();
749 }
750
757
760 {
761 ++row_count_;
762 auto fields = csv_read_row(*stream_, delimiter_);
763 if (has_header_)
764 return CsvRow(std::move(fields), header_);
765 return CsvRow(std::move(fields));
766 }
767
769 [[nodiscard]] size_t rows_read() const { return row_count_; }
770
772 [[nodiscard]] const Array<std::string> & header() const { return header_; }
773
775 [[nodiscard]] bool has_header() const { return has_header_; }
776
778 void skip(size_t n)
779 {
780 for (size_t i = 0; i < n and has_next(); ++i)
781 next();
782 }
783
785 void reset()
786 {
787 if (owns_stream_)
788 {
789 file_.clear();
790 file_.seekg(0);
791 row_count_ = 0;
792 if (has_header_)
793 read_header(); // Re-read header
794 }
795 }
796
797 // Iterator support for range-based for loops
799 {
803
804 public:
807 {
809 current_ = reader_->next();
810 else
811 at_end_ = true;
812 }
813
814 const Array<std::string> & operator*() const { return current_; }
815
817 {
818 if (reader_->has_next())
819 current_ = reader_->next();
820 else
821 at_end_ = true;
822 return *this;
823 }
824
825 bool operator!=(const Iterator & other) const
826 {
827 return at_end_ != other.at_end_;
828 }
829 };
830
831 Iterator begin() { return Iterator(this, false); }
832 Iterator end() { return Iterator(this, true); }
833};
834
835//============================================================================
836// Filtering and Selection Functions
837//============================================================================
838
858template <typename Pred>
861{
863 for (size_t i = 0; i < rows.size(); ++i)
864 if (predicate(rows(i)))
865 result.append(rows(i));
866 return result;
867}
868
881 const Array<Array<std::string>> & rows,
882 size_t col_index,
883 const std::string & value)
884{
885 return csv_filter(rows, [col_index, &value](const Array<std::string> & row) {
886 return col_index < row.size() and row(col_index) == value;
887 });
888}
889
907 const Array<Array<std::string>> & rows,
909{
911
912 for (size_t i = 0; i < rows.size(); ++i)
913 {
915 for (size_t j = 0; j < col_indices.size(); ++j)
916 {
917 size_t idx = col_indices(j);
918 if (idx < rows(i).size())
919 new_row.append(rows(i)(idx));
920 else
921 new_row.append(""); // Empty if column doesn't exist
922 }
923 result.append(std::move(new_row));
924 }
925
926 return result;
927}
928
938 const Array<Array<std::string>> & rows,
939 size_t n)
940{
942 for (size_t i = n; i < rows.size(); ++i)
943 result.append(rows(i));
944 return result;
945}
946
956 const Array<Array<std::string>> & rows,
957 size_t n)
958{
960 size_t limit = (n < rows.size()) ? n : rows.size();
961 for (size_t i = 0; i < limit; ++i)
962 result.append(rows(i));
963 return result;
964}
965
966//============================================================================
967// Statistics Functions
968//============================================================================
969
973[[nodiscard]] inline size_t csv_count_rows(const Array<Array<std::string>> & rows)
974{
975 return rows.size();
976}
977
981[[nodiscard]] inline size_t csv_count_empty(const Array<Array<std::string>> & rows)
982{
983 size_t count = 0;
984 for (size_t i = 0; i < rows.size(); ++i)
985 for (size_t j = 0; j < rows(i).size(); ++j)
986 if (rows(i)(j).empty())
987 ++count;
988 return count;
989}
990
994template <typename Pred>
995[[nodiscard]] inline size_t csv_count_if(const Array<Array<std::string>> & rows,
997{
998 size_t count = 0;
999 for (size_t i = 0; i < rows.size(); ++i)
1000 if (predicate(rows(i)))
1001 ++count;
1002 return count;
1003}
1004
1009template <typename Pred>
1010[[nodiscard]] inline size_t csv_find_row(const Array<Array<std::string>> & rows,
1012{
1013 for (size_t i = 0; i < rows.size(); ++i)
1014 if (predicate(rows(i)))
1015 return i;
1016 return rows.size();
1017}
1018
1024 const Array<Array<std::string>> & rows,
1025 size_t col_index,
1026 const std::string & value)
1027{
1028 return csv_find_row(rows, [col_index, &value](const Array<std::string> & row) {
1029 return col_index < row.size() and row(col_index) == value;
1030 });
1031}
1032
1037 size_t col_index)
1038{
1039 Array<std::string> result;
1040 for (size_t i = 0; i < rows.size(); ++i)
1041 {
1042 if (col_index >= rows(i).size()) continue;
1043
1044 const std::string & val = rows(i)(col_index);
1045 bool found = false;
1046 for (size_t j = 0; j < result.size(); ++j)
1047 if (result(j) == val) { found = true; break; }
1048
1049 if (not found)
1050 result.append(val);
1051 }
1052 return result;
1053}
1054
1055//============================================================================
1056// Transformation Functions
1057//============================================================================
1058
1068 const Array<Array<std::string>> & rows)
1069{
1070 if (rows.size() == 0)
1071 return Array<Array<std::string>>();
1072
1073 if (not csv_is_rectangular(rows))
1074 throw std::runtime_error("Cannot transpose non-rectangular CSV data");
1075
1076 size_t num_rows = rows.size();
1077 size_t num_cols = rows(0).size();
1078
1080 for (size_t j = 0; j < num_cols; ++j)
1081 {
1083 for (size_t i = 0; i < num_rows; ++i)
1084 new_row.append(rows(i)(j));
1085 result.append(std::move(new_row));
1086 }
1087
1088 return result;
1089}
1090
1101 const Array<Array<std::string>> & rows,
1102 size_t col_index,
1103 bool ascending = true)
1104{
1105 // Create index array
1107 for (size_t i = 0; i < rows.size(); ++i)
1108 indices.append(i);
1109
1110 // Sort indices by column value
1111 for (size_t i = 0; i < indices.size(); ++i)
1112 for (size_t j = i + 1; j < indices.size(); ++j)
1113 {
1114 const std::string & a = (col_index < rows(indices(i)).size())
1115 ? rows(indices(i))(col_index) : "";
1116 const std::string & b = (col_index < rows(indices(j)).size())
1117 ? rows(indices(j))(col_index) : "";
1118
1119 bool should_swap = ascending ? (a > b) : (a < b);
1120 if (should_swap)
1121 std::swap(indices(i), indices(j));
1122 }
1123
1124 // Build sorted result
1126 for (size_t i = 0; i < indices.size(); ++i)
1127 result.append(rows(indices(i)));
1128
1129 return result;
1130}
1131
1142template <typename T>
1144 const Array<Array<std::string>> & rows,
1145 size_t col_index,
1146 bool ascending = true)
1147{
1148 // Create pairs of (value, index)
1150 for (size_t i = 0; i < rows.size(); ++i)
1151 {
1152 T val = (col_index < rows(i).size())
1153 ? csv_to_number<T>(rows(i)(col_index)) : T{};
1154 pairs.append({val, i});
1155 }
1156
1157 // Sort pairs
1158 for (size_t i = 0; i < pairs.size(); ++i)
1159 for (size_t j = i + 1; j < pairs.size(); ++j)
1160 {
1161 bool should_swap = ascending
1162 ? (pairs(i).first > pairs(j).first)
1163 : (pairs(i).first < pairs(j).first);
1164 if (should_swap)
1165 std::swap(pairs(i), pairs(j));
1166 }
1167
1168 // Build sorted result
1170 for (size_t i = 0; i < pairs.size(); ++i)
1171 result.append(rows(pairs(i).second));
1172
1173 return result;
1174}
1175
1184 const Array<Array<std::string>> & rows)
1185{
1187
1188 for (size_t i = 0; i < rows.size(); ++i)
1189 {
1190 bool is_duplicate = false;
1191 for (size_t j = 0; j < result.size(); ++j)
1192 {
1193 if (result(j).size() != rows(i).size()) continue;
1194
1195 bool equal = true;
1196 for (size_t k = 0; k < rows(i).size(); ++k)
1197 if (result(j)(k) != rows(i)(k)) { equal = false; break; }
1198
1199 if (equal) { is_duplicate = true; break; }
1200 }
1201
1202 if (not is_duplicate)
1203 result.append(rows(i));
1204 }
1205
1206 return result;
1207}
1208
1218template <typename Func>
1220 const Array<Array<std::string>> & rows,
1221 Func func)
1222{
1224
1225 for (size_t i = 0; i < rows.size(); ++i)
1226 {
1228 for (size_t j = 0; j < rows(i).size(); ++j)
1229 new_row.append(func(rows(i)(j)));
1230 result.append(std::move(new_row));
1231 }
1232
1233 return result;
1234}
1235
1236//============================================================================
1237// Utility Functions
1238//============================================================================
1239
1250inline bool csv_skip_bom(std::istream & in)
1251{
1252 char c1 = in.get();
1253 char c2 = in.get();
1254 char c3 = in.get();
1255
1256 if (c1 == '\xEF' and c2 == '\xBB' and c3 == '\xBF')
1257 return true; // BOM skipped
1258
1259 // Not a BOM, put characters back
1260 in.putback(c3);
1261 in.putback(c2);
1262 in.putback(c1);
1263 return false;
1264}
1265
1274 const Array<Array<std::string>> & rows)
1275{
1276 auto trim = [](const std::string & s) -> std::string {
1277 size_t start = 0;
1278 while (start < s.size() and std::isspace(static_cast<unsigned char>(s[start])))
1279 ++start;
1280
1281 size_t end = s.size();
1282 while (end > start and std::isspace(static_cast<unsigned char>(s[end - 1])))
1283 --end;
1284
1285 return s.substr(start, end - start);
1286 };
1287
1288 return csv_transform(rows, trim);
1289}
1290
1300 const Array<Array<std::string>> & rows,
1301 const std::string & default_value)
1302{
1303 return csv_transform(rows, [&default_value](const std::string & s) {
1304 return s.empty() ? default_value : s;
1305 });
1306}
1307
1320 const Array<Array<std::string>> & left,
1321 const Array<Array<std::string>> & right)
1322{
1324 size_t max_rows = (left.size() > right.size()) ? left.size() : right.size();
1325
1326 for (size_t i = 0; i < max_rows; ++i)
1327 {
1329
1330 // Add fields from left
1331 if (i < left.size())
1332 for (size_t j = 0; j < left(i).size(); ++j)
1333 new_row.append(left(i)(j));
1334
1335 // Add fields from right
1336 if (i < right.size())
1337 for (size_t j = 0; j < right(i).size(); ++j)
1338 new_row.append(right(i)(j));
1339
1340 result.append(std::move(new_row));
1341 }
1342
1343 return result;
1344}
1345
1355 const Array<Array<std::string>> & top,
1357{
1359
1360 for (size_t i = 0; i < top.size(); ++i)
1361 result.append(top(i));
1362
1363 for (size_t i = 0; i < bottom.size(); ++i)
1364 result.append(bottom(i));
1365
1366 return result;
1367}
1368
1380 const Array<Array<std::string>> & left,
1381 size_t left_key_col,
1382 const Array<Array<std::string>> & right,
1383 size_t right_key_col)
1384{
1386
1387 for (size_t i = 0; i < left.size(); ++i)
1388 {
1389 if (left_key_col >= left(i).size()) continue;
1390 const std::string & left_key = left(i)(left_key_col);
1391
1392 for (size_t j = 0; j < right.size(); ++j)
1393 {
1394 if (right_key_col >= right(j).size()) continue;
1395
1396 if (left_key == right(j)(right_key_col))
1397 {
1398 // Combine rows (excluding duplicate key from right)
1400
1401 for (size_t k = 0; k < left(i).size(); ++k)
1402 new_row.append(left(i)(k));
1403
1404 for (size_t k = 0; k < right(j).size(); ++k)
1405 if (k != right_key_col)
1406 new_row.append(right(j)(k));
1407
1408 result.append(std::move(new_row));
1409 }
1410 }
1411 }
1412
1413 return result;
1414}
1415
1425 const Array<Array<std::string>> & rows,
1426 size_t col_index)
1427{
1430
1431 for (size_t k = 0; k < keys.size(); ++k)
1432 {
1434 for (size_t i = 0; i < rows.size(); ++i)
1435 if (col_index < rows(i).size() and rows(i)(col_index) == keys(k))
1436 group.append(rows(i));
1437 result.append(std::move(group));
1438 }
1439
1440 return result;
1441}
1442
1462template <typename Func>
1464 const Array<Array<std::string>> & rows,
1465 Func func)
1466{
1468
1469 for (size_t i = 0; i < rows.size(); ++i)
1470 {
1472 for (size_t j = 0; j < rows(i).size(); ++j)
1473 new_row.append(rows(i)(j));
1474 new_row.append(func(rows(i)));
1475 result.append(std::move(new_row));
1476 }
1477
1478 return result;
1479}
1480
1491 const Array<Array<std::string>> & rows,
1492 const std::string & old_name,
1493 const std::string & new_name)
1494{
1495 if (rows.size() == 0)
1496 return rows;
1497
1499
1500 // Process header
1502 for (size_t j = 0; j < rows(0).size(); ++j)
1503 {
1504 if (rows(0)(j) == old_name)
1506 else
1507 new_header.append(rows(0)(j));
1508 }
1509 result.append(std::move(new_header));
1510
1511 // Copy remaining rows
1512 for (size_t i = 1; i < rows.size(); ++i)
1513 result.append(rows(i));
1514
1515 return result;
1516}
1517
1518} // namespace Aleph
1519
1520// Global namespace compatibility: export commonly used functions and classes
1521using Aleph::CsvRow;
1522using Aleph::CsvReader;
1526using Aleph::csv_escape;
1534using Aleph::csv_filter;
1548using Aleph::csv_unique;
1559
1560#endif // PARSE_CSV_H
Simple dynamic array with automatic resizing and functional operations.
Definition tpl_array.H:138
constexpr size_t size() const noexcept
Return the number of elements stored in the stack.
Definition tpl_array.H:333
T & append(const T &data)
Append a copy of data
Definition tpl_array.H:239
const Array< std::string > & operator*() const
Definition parse-csv.H:814
bool operator!=(const Iterator &other) const
Definition parse-csv.H:825
Array< std::string > current_
Definition parse-csv.H:801
Iterator(CsvReader *reader, bool at_end)
Definition parse-csv.H:805
Lazy CSV reader for large files.
Definition parse-csv.H:711
void skip(size_t n)
Skip N rows.
Definition parse-csv.H:778
CsvRow next_row()
Read the next row with header access.
Definition parse-csv.H:759
Array< std::string > next()
Read the next row as Array.
Definition parse-csv.H:752
CsvReader(std::istream &stream, char delimiter=',')
Construct reader from stream.
Definition parse-csv.H:732
CsvReader(const std::string &filename, char delimiter=',')
Construct reader from file path.
Definition parse-csv.H:723
void reset()
Reset to beginning (only works for file-based readers).
Definition parse-csv.H:785
const Array< std::string > & header() const
Get the header (if read).
Definition parse-csv.H:772
Iterator end()
Definition parse-csv.H:832
Array< std::string > read_header()
Read and store the header row.
Definition parse-csv.H:738
Array< std::string > header_
Definition parse-csv.H:716
size_t rows_read() const
Get number of rows read so far.
Definition parse-csv.H:769
std::istream * stream_
Definition parse-csv.H:713
std::ifstream file_
Definition parse-csv.H:712
bool has_next() const
Check if more rows are available.
Definition parse-csv.H:746
Iterator begin()
Definition parse-csv.H:831
bool has_header() const
Check if header was read.
Definition parse-csv.H:775
A CSV row with header-based field access.
Definition parse-csv.H:601
T get(const std::string &column_name) const
Get field as numeric type by column name.
Definition parse-csv.H:662
T get(size_t index) const
Get field as numeric type.
Definition parse-csv.H:655
const Array< std::string > * header_
Definition parse-csv.H:603
const std::string & at(size_t index) const
Get field by index with bounds checking.
Definition parse-csv.H:636
const Array< std::string > * header() const
Get column names (if header set).
Definition parse-csv.H:677
const std::string & operator[](const std::string &column_name) const
Get field by column name.
Definition parse-csv.H:623
bool empty() const
Check if row is empty.
Definition parse-csv.H:671
size_t size() const
Get number of fields.
Definition parse-csv.H:668
Array< std::string > fields_
Definition parse-csv.H:602
CsvRow(Array< std::string > fields)
Construct from fields only (no header access).
Definition parse-csv.H:607
bool has_column(const std::string &column_name) const
Check if column exists.
Definition parse-csv.H:644
CsvRow(Array< std::string > fields, const Array< std::string > &header)
Construct with header reference for name-based access.
Definition parse-csv.H:611
const std::string & operator[](size_t index) const
Get field by index.
Definition parse-csv.H:615
const Array< std::string > & fields() const
Get underlying array.
Definition parse-csv.H:674
T & append(const T &item)
Append a new item by copy.
Definition htlist.H:1562
size_t size() const noexcept
Count the number of elements of the list.
Definition htlist.H:1319
Main namespace for Aleph-w library functions.
Definition ah-arena.H:89
long csv_to_number< long >(const std::string &field)
Definition parse-csv.H:555
size_t csv_find_row(const Array< Array< std::string > > &rows, Pred predicate)
Find first row matching a predicate.
Definition parse-csv.H:1010
Array< Array< std::string > > csv_sort_by_column_numeric(const Array< Array< std::string > > &rows, size_t col_index, bool ascending=true)
Sort CSV data by a column with numeric comparison.
Definition parse-csv.H:1143
Array< Array< std::string > > csv_trim_fields(const Array< Array< std::string > > &rows)
Trim whitespace from all fields.
Definition parse-csv.H:1273
std::string csv_escape(const std::string &field, char delimiter=',')
Escape a string for CSV output.
Definition parse-csv.H:320
Array< Array< std::string > > csv_read_file(const std::string &filename, char delimiter=',')
Read all rows from a CSV file.
Definition parse-csv.H:290
Array< Array< std::string > > csv_transform(const Array< Array< std::string > > &rows, Func func)
Apply a transformation to each field.
Definition parse-csv.H:1219
Array< Array< std::string > > csv_filter_by_value(const Array< Array< std::string > > &rows, size_t col_index, const std::string &value)
Filter CSV rows by column value.
Definition parse-csv.H:880
size_t size(Node *root) noexcept
bool csv_skip_bom(std::istream &in)
Skip UTF-8 BOM if present.
Definition parse-csv.H:1250
Array< Array< std::string > > csv_transpose(const Array< Array< std::string > > &rows)
Transpose CSV data (swap rows and columns).
Definition parse-csv.H:1067
T csv_to_number(const std::string &field)
Convert a CSV field to a numeric type.
Definition parse-csv.H:534
Array< Array< std::string > > csv_rename_column(const Array< Array< std::string > > &rows, const std::string &old_name, const std::string &new_name)
Rename a column (in the header row).
Definition parse-csv.H:1490
Array< Array< std::string > > csv_take_rows(const Array< Array< std::string > > &rows, size_t n)
Take only the first N rows of CSV data.
Definition parse-csv.H:955
std::decay_t< typename HeadC::Item_Type > T
Definition ah-zip.H:107
Array< std::string > csv_get_column(const Array< Array< std::string > > &rows, size_t col_index)
Get a column from CSV data.
Definition parse-csv.H:497
Array< Array< std::string > > csv_join_horizontal(const Array< Array< std::string > > &left, const Array< Array< std::string > > &right)
Join two CSV datasets horizontally (add columns).
Definition parse-csv.H:1319
Array< Array< std::string > > csv_unique(const Array< Array< std::string > > &rows)
Remove duplicate rows.
Definition parse-csv.H:1183
Array< Array< Array< std::string > > > csv_group_by(const Array< Array< std::string > > &rows, size_t col_index)
Group rows by a column value.
Definition parse-csv.H:1424
float csv_to_number< float >(const std::string &field)
Definition parse-csv.H:569
void csv_write_row(std::ostream &out, const Array< std::string > &row, char delimiter=',', const std::string &line_ending="\n")
Write a CSV row to an output stream.
Definition parse-csv.H:376
size_t csv_num_columns(const Array< std::string > &row)
Get the number of columns in a CSV row.
Definition parse-csv.H:444
size_t csv_count_empty(const Array< Array< std::string > > &rows)
Count empty fields in CSV data.
Definition parse-csv.H:981
std::string trim(const std::string &s)
Return a trimmed copy of a std::string (leading + trailing whitespace removed).
Array< std::string > csv_read_row(std::istream &in, char delimiter=',')
Read a single CSV row from an input stream.
Definition parse-csv.H:158
size_t csv_find_by_value(const Array< Array< std::string > > &rows, size_t col_index, const std::string &value)
Find row where column equals value.
Definition parse-csv.H:1023
bool csv_is_rectangular(const Array< Array< std::string > > &rows)
Check if all rows have the same number of columns.
Definition parse-csv.H:466
bool equal(Itor1 beg, const Itor1 &end, Itor2 cmpBeg, BinaryPredicate op=BinaryPredicate())
Test if two ranges are equal.
Definition ahAlgo.H:482
Array< std::string > csv_distinct(const Array< Array< std::string > > &rows, size_t col_index)
Get distinct values in a column.
Definition parse-csv.H:1036
Array< Array< std::string > > csv_filter(const Array< Array< std::string > > &rows, Pred predicate)
Filter CSV rows by a predicate.
Definition parse-csv.H:859
Array< Array< std::string > > csv_read_all(std::istream &in, char delimiter=',')
Read all rows from a CSV input stream.
Definition parse-csv.H:253
int csv_to_number< int >(const std::string &field)
Definition parse-csv.H:548
double csv_to_number< double >(const std::string &field)
Definition parse-csv.H:562
Array< Array< std::string > > csv_inner_join(const Array< Array< std::string > > &left, size_t left_key_col, const Array< Array< std::string > > &right, size_t right_key_col)
Inner join two CSV datasets by a key column.
Definition parse-csv.H:1379
void csv_write_file(const std::string &filename, const Array< Array< std::string > > &rows, char delimiter=',', const std::string &line_ending="\n")
Write CSV data to a file.
Definition parse-csv.H:424
Array< Array< std::string > > csv_skip_rows(const Array< Array< std::string > > &rows, size_t n)
Skip the first N rows of CSV data.
Definition parse-csv.H:937
Array< Array< std::string > > csv_select_columns(const Array< Array< std::string > > &rows, const Array< size_t > &col_indices)
Select specific columns from CSV data.
Definition parse-csv.H:906
Array< Array< std::string > > csv_fill_empty(const Array< Array< std::string > > &rows, const std::string &default_value)
Replace empty fields with a default value.
Definition parse-csv.H:1299
size_t csv_count_if(const Array< Array< std::string > > &rows, Pred predicate)
Count rows matching a predicate.
Definition parse-csv.H:995
Array< Array< std::string > > csv_join_vertical(const Array< Array< std::string > > &top, const Array< Array< std::string > > &bottom)
Join two CSV datasets vertically (add rows).
Definition parse-csv.H:1354
Array< Array< std::string > > csv_add_column(const Array< Array< std::string > > &rows, Func func)
Add a new column with computed values.
Definition parse-csv.H:1463
Array< Array< std::string > > csv_sort_by_column(const Array< Array< std::string > > &rows, size_t col_index, bool ascending=true)
Sort CSV data by a column.
Definition parse-csv.H:1100
void csv_write_all(std::ostream &out, const Array< Array< std::string > > &rows, char delimiter=',', const std::string &line_ending="\n")
Write multiple CSV rows to an output stream.
Definition parse-csv.H:402
DynList< T > maps(const C &c, Op op)
Classic map operation.
Itor::difference_type count(const Itor &beg, const Itor &end, const T &value)
Count elements equal to a value.
Definition ahAlgo.H:127
size_t csv_count_rows(const Array< Array< std::string > > &rows)
Count total number of rows.
Definition parse-csv.H:973
STL namespace.
Dynamic array container with automatic resizing.
fstream file[12]
Definition treapObs.C:67