Aleph-w 3.0
A C++ Library for Data Structures and Algorithms
Loading...
Searching...
No Matches
parse_utils_test.cc
Go to the documentation of this file.
1
2/*
3 Aleph_w
4
5 Data structures & Algorithms
6 version 2.0.0b
7 https://github.com/lrleon/Aleph-w
8
9 This file is part of Aleph-w library
10
11 Copyright (c) 2002-2026 Leandro Rabindranath Leon
12
13 Permission is hereby granted, free of charge, to any person obtaining a copy
14 of this software and associated documentation files (the "Software"), to deal
15 in the Software without restriction, including without limitation the rights
16 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 copies of the Software, and to permit persons to whom the Software is
18 furnished to do so, subject to the following conditions:
19
20 The above copyright notice and this permission notice shall be included in all
21 copies or substantial portions of the Software.
22
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 SOFTWARE.
30*/
31
32
38#include <gtest/gtest.h>
39#include <fstream>
40#include <sstream>
41#include <cstdio>
42#include <parse_utils.H>
43
44using namespace std;
45using namespace testing;
46using namespace Aleph;
47
48// ============================================================================
49// Test Fixture
50// ============================================================================
51
52class ParseUtilsTest : public Test
53{
54protected:
56
57 void SetUp() override
58 {
59 // Create a unique temp file name
60 temp_filename = "/tmp/parse_utils_test_" + to_string(getpid()) + ".txt";
62 }
63
64 void TearDown() override
65 {
66 // Remove temp file if it exists
67 remove(temp_filename.c_str());
68 }
69
70 // Helper to create temp file with content
71 void create_temp_file(const string& content)
72 {
73 ofstream out(temp_filename);
74 out << content;
75 out.close();
76 }
77};
78
79// ============================================================================
80// put_char_in_buffer Tests
81// ============================================================================
82
84{
85 char buffer[10];
86 char* ptr = buffer;
87 char* end = buffer + 10;
88
89 put_char_in_buffer(ptr, end, 'H');
90 put_char_in_buffer(ptr, end, 'i');
91 put_char_in_buffer(ptr, end, '\0');
92
93 EXPECT_STREQ(buffer, "Hi");
94}
95
97{
98 char buffer[10];
99 char* ptr = buffer;
100 char* end = buffer + 10;
101
102 EXPECT_EQ(ptr, buffer);
103 put_char_in_buffer(ptr, end, 'A');
104 EXPECT_EQ(ptr, buffer + 1);
105 put_char_in_buffer(ptr, end, 'B');
106 EXPECT_EQ(ptr, buffer + 2);
107}
108
110{
111 char buffer[5];
112 char* ptr = buffer;
113 char* end = buffer + 5;
114
115 for (int i = 0; i < 5; ++i)
116 put_char_in_buffer(ptr, end, 'X');
117
118 EXPECT_EQ(ptr, end);
119}
120
121// Note: Buffer overflow causes AH_ERROR which terminates - can't easily test
122
123// ============================================================================
124// init_token_scanning / close_token_scanning Tests
125// ============================================================================
126
137
139{
140 char buffer[20] = "hello";
141 char* ptr = buffer + 5;
142 char* end = buffer + 20;
143
144 close_token_scanning(buffer, ptr, end);
145
147}
148
150{
151 char buffer[20] = "test";
152 char* ptr = buffer + 4;
153 char* end = buffer + 20;
154
155 // Overwrite the implicit null
156 buffer[4] = 'X';
157 buffer[5] = 'Y';
158
159 close_token_scanning(buffer, ptr, end);
160
161 // Should have added null terminator at position 4
162 EXPECT_EQ(buffer[4], '\0');
164}
165
166// ============================================================================
167// read_char_from_stream Tests
168// ============================================================================
169
171{
172 create_temp_file("ABC");
173 ifstream input(temp_filename);
174
178}
179
191
193{
194 create_temp_file("A\nB");
195 ifstream input(temp_filename);
196
202 EXPECT_EQ(Aleph::current_col_number, 0); // Reset on newline
203}
204
206{
207 create_temp_file("");
208 ifstream input(temp_filename);
209
210 // Force EOF
211 input.get(); // This will set EOF flag
212
214}
215
216// ============================================================================
217// skip_white_spaces Tests
218// ============================================================================
219
221{
222 create_temp_file(" hello");
223 ifstream input(temp_filename);
224
226
227 EXPECT_EQ(input.peek(), 'h');
228}
229
231{
232 create_temp_file("\t\thello");
233 ifstream input(temp_filename);
234
236
237 EXPECT_EQ(input.peek(), 'h');
238}
239
241{
242 create_temp_file("\n\nhello");
243 ifstream input(temp_filename);
244
246
247 EXPECT_EQ(input.peek(), 'h');
248}
249
251{
252 create_temp_file(" \t\n \t\nhello");
253 ifstream input(temp_filename);
254
256
257 EXPECT_EQ(input.peek(), 'h');
258}
259
261{
262 create_temp_file("hello");
263 ifstream input(temp_filename);
264
266
267 EXPECT_EQ(input.peek(), 'h');
268}
269
270// ============================================================================
271// load_number Tests
272// ============================================================================
273
275{
276 create_temp_file("42 ");
277 ifstream input(temp_filename);
278
280}
281
283{
284 create_temp_file("-123 ");
285 ifstream input(temp_filename);
286
288}
289
291{
292 create_temp_file("0 ");
293 ifstream input(temp_filename);
294
296}
297
299{
300 create_temp_file(" 100 ");
301 ifstream input(temp_filename);
302
304}
305
307{
308 create_temp_file("10 20 30 ");
309 ifstream input(temp_filename);
310
314}
315
317{
318 // Number without trailing whitespace at EOF throws exception
319 // because the parser expects whitespace to terminate numbers
320 create_temp_file("42");
321 ifstream input(temp_filename);
322
323 // The implementation throws when number doesn't end in whitespace
324 EXPECT_THROW(load_number(input), std::domain_error);
325}
326
328{
329 // Number with trailing newline works fine
330 create_temp_file("42\n");
331 ifstream input(temp_filename);
332
334}
335
337{
338 create_temp_file("1234567890 ");
339 ifstream input(temp_filename);
340
341 EXPECT_EQ(load_number(input), 1234567890L);
342}
343
345{
346 create_temp_file("-987654321 ");
347 ifstream input(temp_filename);
348
349 EXPECT_EQ(load_number(input), -987654321L);
350}
351
353{
354 create_temp_file("999 ");
355 ifstream input(temp_filename);
356
358
360}
361
362// ============================================================================
363// load_string Tests
364// ============================================================================
365
367{
368 create_temp_file("hello ");
369 ifstream input(temp_filename);
370
371 EXPECT_EQ(load_string(input), "hello");
372}
373
375{
376 create_temp_file("\"hello world\" ");
377 ifstream input(temp_filename);
378
379 EXPECT_EQ(load_string(input), "hello world");
380}
381
383{
384 create_temp_file("\"hello world\" ");
385 ifstream input(temp_filename);
386
387 EXPECT_EQ(load_string(input), "hello world");
388}
389
391{
392 create_temp_file(" hello ");
393 ifstream input(temp_filename);
394
395 EXPECT_EQ(load_string(input), "hello");
396}
397
399{
400 create_temp_file("hello world test ");
401 ifstream input(temp_filename);
402
403 EXPECT_EQ(load_string(input), "hello");
404 EXPECT_EQ(load_string(input), "world");
405 EXPECT_EQ(load_string(input), "test");
406}
407
409{
410 create_temp_file("hello \"world test\" foo ");
411 ifstream input(temp_filename);
412
413 EXPECT_EQ(load_string(input), "hello");
414 EXPECT_EQ(load_string(input), "world test");
415 EXPECT_EQ(load_string(input), "foo");
416}
417
419{
420 create_temp_file("\"\" ");
421 ifstream input(temp_filename);
422
424}
425
427{
428 create_temp_file("test ");
429 ifstream input(temp_filename);
430
432
434}
435
437{
438 create_temp_file("abc123 ");
439 ifstream input(temp_filename);
440
441 EXPECT_EQ(load_string(input), "abc123");
442}
443
445{
446 create_temp_file("\"line1\nline2\" ");
447 ifstream input(temp_filename);
448
449 string result = load_string(input);
450 EXPECT_EQ(result, "line1\nline2");
451}
452
453// ============================================================================
454// command_line_to_string Tests
455// ============================================================================
456
458{
459 const char* argv[] = {"program"};
460 string result = command_line_to_string(1, const_cast<char**>(argv));
461
462 EXPECT_EQ(result, " program");
463}
464
466{
467 const char* argv[] = {"program", "--input", "file.txt", "-v"};
468 string result = command_line_to_string(4, const_cast<char**>(argv));
469
470 EXPECT_EQ(result, " program --input file.txt -v");
471}
472
474{
475 string result = command_line_to_string(0, nullptr);
476
477 EXPECT_EQ(result, "");
478}
479
480// ============================================================================
481// reset_parse_state Tests
482// ============================================================================
483
498
507
508// ============================================================================
509// Integration Tests
510// ============================================================================
511
513{
514 // Simulate a simple config file format:
515 // name value
516 // ...
517 create_temp_file("count 10\nname \"test file\"\nsize 42\n");
518 ifstream input(temp_filename);
519
520 string key1 = load_string(input);
521 long val1 = load_number(input);
522 EXPECT_EQ(key1, "count");
523 EXPECT_EQ(val1, 10);
524
525 string key2 = load_string(input);
526 string val2 = load_string(input);
527 EXPECT_EQ(key2, "name");
528 EXPECT_EQ(val2, "test file");
529
530 string key3 = load_string(input);
531 long val3 = load_number(input);
532 EXPECT_EQ(key3, "size");
533 EXPECT_EQ(val3, 42);
534}
535
537{
538 create_temp_file(" 123 \"hello world\" -456 test ");
539 ifstream input(temp_filename);
540
542 EXPECT_EQ(load_string(input), "hello world");
544 EXPECT_EQ(load_string(input), "test");
545}
546
548{
549 create_temp_file("line1\nline2\nline3\n");
550 ifstream input(temp_filename);
551
552 // Note: current_line_number updates AFTER reading newline,
553 // so after reading "line1\n", we're at line 2
554 EXPECT_EQ(load_string(input), "line1");
555 EXPECT_EQ(Aleph::current_line_number, 2); // After reading newline
556
557 EXPECT_EQ(load_string(input), "line2");
559
560 EXPECT_EQ(load_string(input), "line3");
562}
563
564// ============================================================================
565// Position Tracking Tests
566// ============================================================================
567
569{
570 create_temp_file("abc def\nghi jkl\n");
571 ifstream input(temp_filename);
572
573 // Read first token
575
576 // Previous position is recorded by init_token_scanning() inside load_string()
577 // It records position BEFORE skipping whitespace, which happens after token
580
581 // Read second token - previous_col is where skip_white_spaces left off
583
585 // Column 6 because: skip reads space (col 5), then init captures col 6
587
588 // Read third token (on new line)
590
592}
593
594// ============================================================================
595// Edge Cases
596// ============================================================================
597
599{
600 create_temp_file("- 5 ");
601 ifstream input(temp_filename);
602
603 // A lone minus followed by space should return 0 (atol(""))
604 long result = load_number(input);
605 EXPECT_EQ(result, 0); // atol("-") or empty
606}
607
609{
610 create_temp_file("\"hello@world#test!\" ");
611 ifstream input(temp_filename);
612
613 EXPECT_EQ(load_string(input), "hello@world#test!");
614}
615
617{
618 create_temp_file("\"path\\to\\file\" ");
619 ifstream input(temp_filename);
620
621 // Backslashes are preserved (no escape processing)
622 EXPECT_EQ(load_string(input), "path\\to\\file");
623}
624
626{
627 string longstr(400, 'X'); // 400 X's, within Buffer_Size
628 create_temp_file("\"" + longstr + "\" ");
629 ifstream input(temp_filename);
630
632}
633
634// ============================================================================
635// Buffer Size Constant Test
636// ============================================================================
637
643
644// ============================================================================
645// SourceLocation Tests
646// ============================================================================
647
649{
650 SourceLocation loc;
651 EXPECT_TRUE(loc.filename.empty());
652 EXPECT_EQ(loc.line, 1);
653 EXPECT_EQ(loc.column, 1);
654}
655
657{
658 SourceLocation loc("test.cpp", 42, 15);
659 EXPECT_EQ(loc.filename, "test.cpp");
660 EXPECT_EQ(loc.line, 42);
661 EXPECT_EQ(loc.column, 15);
662}
663
665{
666 SourceLocation loc("test.cpp", 10, 5);
667 EXPECT_EQ(loc.to_string(), "test.cpp:10:5");
668}
669
671{
672 SourceLocation loc("", 10, 5);
673 EXPECT_EQ(loc.to_string(), "10:5");
674}
675
677{
680
681 auto loc = SourceLocation::current("file.txt");
682
683 EXPECT_EQ(loc.filename, "file.txt");
684 EXPECT_EQ(loc.line, 100);
685 EXPECT_EQ(loc.column, 50);
686
688}
689
690// ============================================================================
691// ParseError Tests
692// ============================================================================
693
695{
696 SourceLocation loc("test.cpp", 5, 10);
697 ParseError err("unexpected token", loc);
698
699 string msg = err.what();
700 EXPECT_NE(msg.find("test.cpp"), string::npos);
701 EXPECT_NE(msg.find("5"), string::npos);
702 EXPECT_NE(msg.find("unexpected token"), string::npos);
703}
704
706{
707 SourceLocation loc("file.c", 20, 30);
708 ParseError err("error", loc);
709
710 EXPECT_EQ(err.location().line, 20);
711 EXPECT_EQ(err.location().column, 30);
712}
713
714// ============================================================================
715// Lookahead and Backtracking Tests
716// ============================================================================
717
719{
720 create_temp_file("ABC");
721 ifstream input(temp_filename);
722
724 EXPECT_EQ(peek_char(input), 'A'); // Still 'A'
725 EXPECT_EQ(input.get(), 'A'); // Now consumed
727}
728
730{
731 create_temp_file("hello world");
732 ifstream input(temp_filename);
733
734 // Read "hello"
735 for (int i = 0; i < 5; ++i)
737
739
740 // Read " world"
741 for (int i = 0; i < 6; ++i)
743
744 // Restore
746
747 EXPECT_EQ(input.get(), ' ');
748 EXPECT_EQ(input.get(), 'w');
749}
750
751// ============================================================================
752// Comment Handling Tests
753// ============================================================================
754
756{
757 create_temp_file("// this is a comment\nhello");
758 ifstream input(temp_filename);
759
760 input.get(); // '/'
761 input.get(); // '/'
763
765
766 EXPECT_EQ(input.peek(), 'h');
768}
769
771{
772 create_temp_file("/* comment */hello");
773 ifstream input(temp_filename);
774
775 input.get(); // '/'
776 input.get(); // '*'
778
780
781 EXPECT_EQ(input.peek(), 'h');
782}
783
785{
786 create_temp_file("/* line 1\n line 2\n line 3 */hello");
787 ifstream input(temp_filename);
788
789 input.get(); // '/'
790 input.get(); // '*'
792
794
795 EXPECT_EQ(input.peek(), 'h');
797}
798
800{
801 create_temp_file(" // comment\n /* block */ hello");
802 ifstream input(temp_filename);
803
805
806 EXPECT_EQ(input.peek(), 'h');
807}
808
810{
811 create_temp_file(" # python style comment\nhello");
812 ifstream input(temp_filename);
813
815
816 EXPECT_EQ(input.peek(), 'h');
817}
818
819// ============================================================================
820// Extended Numeric Parsing Tests
821// ============================================================================
822
824{
825 create_temp_file("3.14159 ");
826 ifstream input(temp_filename);
827
828 EXPECT_NEAR(load_double(input), 3.14159, 0.00001);
829}
830
832{
833 create_temp_file("-2.5 ");
834 ifstream input(temp_filename);
835
836 EXPECT_NEAR(load_double(input), -2.5, 0.001);
837}
838
840{
841 create_temp_file("1.5e10 ");
842 ifstream input(temp_filename);
843
844 EXPECT_NEAR(load_double(input), 1.5e10, 1e5);
845}
846
848{
849 create_temp_file("2.5e-3 ");
850 ifstream input(temp_filename);
851
852 EXPECT_NEAR(load_double(input), 0.0025, 0.00001);
853}
854
856{
857 create_temp_file(".5 ");
858 ifstream input(temp_filename);
859
860 EXPECT_NEAR(load_double(input), 0.5, 0.001);
861}
862
864{
865 create_temp_file("5. ");
866 ifstream input(temp_filename);
867
868 EXPECT_NEAR(load_double(input), 5.0, 0.001);
869}
870
872{
873 create_temp_file("0xFF ");
874 ifstream input(temp_filename);
875
877}
878
880{
881 create_temp_file("0x1a2b ");
882 ifstream input(temp_filename);
883
885}
886
888{
889 create_temp_file("0755 ");
890 ifstream input(temp_filename);
891
893}
894
896{
897 create_temp_file("0 ");
898 ifstream input(temp_filename);
899
901}
902
904{
905 create_temp_file("0b1010 ");
906 ifstream input(temp_filename);
907
909}
910
912{
913 create_temp_file("0B11110000 ");
914 ifstream input(temp_filename);
915
917}
918
919// ============================================================================
920// Identifier Tests
921// ============================================================================
922
924{
925 create_temp_file("myVariable ");
926 ifstream input(temp_filename);
927
928 EXPECT_EQ(load_identifier(input), "myVariable");
929}
930
932{
933 create_temp_file("_private_var ");
934 ifstream input(temp_filename);
935
936 EXPECT_EQ(load_identifier(input), "_private_var");
937}
938
940{
941 create_temp_file("var123 ");
942 ifstream input(temp_filename);
943
944 EXPECT_EQ(load_identifier(input), "var123");
945}
946
948{
949 create_temp_file("camelCaseIdentifier ");
950 ifstream input(temp_filename);
951
952 EXPECT_EQ(load_identifier(input), "camelCaseIdentifier");
953}
954
956{
957 create_temp_file("123invalid ");
958 ifstream input(temp_filename);
959
961}
962
963// ============================================================================
964// Keyword Tests
965// ============================================================================
966
968{
969 vector<string> keywords = {"if", "while", "for", "return"};
970
973 EXPECT_TRUE(is_keyword("return", keywords));
974}
975
977{
978 vector<string> keywords = {"if", "while", "for", "return"};
979
980 EXPECT_FALSE(is_keyword("IF", keywords)); // Case sensitive
981 EXPECT_FALSE(is_keyword("unless", keywords));
983}
984
985// ============================================================================
986// Expectation Tests
987// ============================================================================
988
990{
991 create_temp_file(" ( hello");
992 ifstream input(temp_filename);
993
995}
996
998{
999 create_temp_file(" [ hello");
1000 ifstream input(temp_filename);
1001
1003}
1004
1006{
1007 create_temp_file(" function foo");
1008 ifstream input(temp_filename);
1009
1010 EXPECT_NO_THROW(expect(input, "function"));
1011}
1012
1014{
1015 create_temp_file(" procedure foo");
1016 ifstream input(temp_filename);
1017
1018 EXPECT_THROW(expect(input, "function"), ParseError);
1019}
1020
1022{
1023 create_temp_file(" ; next");
1024 ifstream input(temp_filename);
1025
1026 EXPECT_TRUE(try_char(input, ';'));
1027 // Should have consumed the ';'
1029 EXPECT_EQ(input.peek(), 'n');
1030}
1031
1033{
1034 create_temp_file(" , next");
1035 ifstream input(temp_filename);
1036
1038 // Should NOT have consumed anything
1040 EXPECT_EQ(input.peek(), ',');
1041}
1042
1043// ============================================================================
1044// Escape Processing Tests
1045// ============================================================================
1046
1048{
1049 EXPECT_EQ(process_escape('n'), '\n');
1050 EXPECT_EQ(process_escape('t'), '\t');
1051 EXPECT_EQ(process_escape('r'), '\r');
1052 EXPECT_EQ(process_escape('\\'), '\\');
1053 EXPECT_EQ(process_escape('"'), '"');
1054 EXPECT_EQ(process_escape('\''), '\'');
1055 EXPECT_EQ(process_escape('0'), '\0');
1056}
1057
1059{
1060 // Unknown escapes return the character as-is
1061 EXPECT_EQ(process_escape('x'), 'x');
1062 EXPECT_EQ(process_escape('q'), 'q');
1063}
1064
1065// ============================================================================
1066// Escaped String Tests
1067// ============================================================================
1068
1070{
1071 create_temp_file("\"hello\\nworld\" ");
1072 ifstream input(temp_filename);
1073
1074 string result = load_escaped_string(input);
1075 EXPECT_EQ(result, "hello\nworld");
1076}
1077
1079{
1080 create_temp_file("\"line1\\tline2\\r\\nline3\" ");
1081 ifstream input(temp_filename);
1082
1083 string result = load_escaped_string(input);
1084 EXPECT_EQ(result, "line1\tline2\r\nline3");
1085}
1086
1088{
1089 create_temp_file("\"he said \\\"hello\\\"\" ");
1090 ifstream input(temp_filename);
1091
1092 string result = load_escaped_string(input);
1093 EXPECT_EQ(result, "he said \"hello\"");
1094}
1095
1097{
1098 create_temp_file("\"path\\\\to\\\\file\" ");
1099 ifstream input(temp_filename);
1100
1101 string result = load_escaped_string(input);
1102 EXPECT_EQ(result, "path\\to\\file");
1103}
1104
1106{
1107 create_temp_file("\"no closing quote");
1108 ifstream input(temp_filename);
1109
1111}
1112
1113// ============================================================================
1114// Character Literal Tests
1115// ============================================================================
1116
1118{
1119 create_temp_file("'a' ");
1120 ifstream input(temp_filename);
1121
1123}
1124
1126{
1127 create_temp_file("'\\n' ");
1128 ifstream input(temp_filename);
1129
1131}
1132
1134{
1135 create_temp_file("'\\t' ");
1136 ifstream input(temp_filename);
1137
1139}
1140
1142{
1143 create_temp_file("'\\'' ");
1144 ifstream input(temp_filename);
1145
1147}
1148
1149// ============================================================================
1150// Token Type Tests
1151// ============================================================================
1152
1154{
1155 EXPECT_EQ(token_type_to_string(TokenType::END_OF_FILE), "EOF");
1156 EXPECT_EQ(token_type_to_string(TokenType::IDENTIFIER), "IDENTIFIER");
1157 EXPECT_EQ(token_type_to_string(TokenType::INTEGER), "INTEGER");
1158 EXPECT_EQ(token_type_to_string(TokenType::STRING), "STRING");
1159}
1160
1162{
1163 SourceLocation loc("file.c", 10, 5);
1164 Token tok(TokenType::IDENTIFIER, "myVar", loc);
1165
1166 EXPECT_EQ(tok.type, TokenType::IDENTIFIER);
1167 EXPECT_EQ(tok.value, "myVar");
1168 EXPECT_EQ(tok.location.line, 10);
1169}
1170
1172{
1173 Token eof(TokenType::END_OF_FILE, "", SourceLocation());
1174 Token id(TokenType::IDENTIFIER, "x", SourceLocation());
1175
1176 EXPECT_TRUE(eof.is_eof());
1177 EXPECT_FALSE(id.is_eof());
1178}
1179
1180// ============================================================================
1181// File Utility Tests
1182// ============================================================================
1183
1185{
1186 create_temp_file("line1\nline2\nline3");
1187
1188 string contents = load_file_contents(temp_filename);
1189 EXPECT_EQ(contents, "line1\nline2\nline3");
1190}
1191
1193{
1194 create_temp_file("line1\nline2\nline3");
1195
1196 vector<string> lines = load_file_lines(temp_filename);
1197 ASSERT_EQ(lines.size(), 3u);
1198 EXPECT_EQ(lines[0], "line1");
1199 EXPECT_EQ(lines[1], "line2");
1200 EXPECT_EQ(lines[2], "line3");
1201}
1202
1204{
1205 EXPECT_THROW(load_file_contents("/nonexistent/file.txt"), runtime_error);
1206}
1207
1208// ============================================================================
1209// String Utility Tests
1210// ============================================================================
1211
1213{
1214 vector<string> parts = split_string("a,b,c,d", ',');
1215 ASSERT_EQ(parts.size(), 4u);
1216 EXPECT_EQ(parts[0], "a");
1217 EXPECT_EQ(parts[1], "b");
1218 EXPECT_EQ(parts[2], "c");
1219 EXPECT_EQ(parts[3], "d");
1220}
1221
1223{
1224 // std::getline produces no tokens for empty string
1226 EXPECT_EQ(parts.size(), 0u);
1227}
1228
1230{
1231 EXPECT_EQ(trim(" hello "), "hello");
1232 EXPECT_EQ(trim("hello"), "hello");
1233 EXPECT_EQ(trim(" hello"), "hello");
1234 EXPECT_EQ(trim("hello "), "hello");
1235}
1236
1238{
1239 EXPECT_EQ(trim("\t\thello\t\t"), "hello");
1240}
1241
1243{
1244 EXPECT_EQ(trim(""), "");
1245 EXPECT_EQ(trim(" "), "");
1246}
1247
1248// ============================================================================
1249// Integration Test: Simple Expression Parser
1250// ============================================================================
1251
1253{
1254 // Parse: x = 42 + 3.14;
1255 create_temp_file("x = 42 + 3.14;");
1256 ifstream input(temp_filename);
1257
1258 string id = load_identifier(input);
1259 EXPECT_EQ(id, "x");
1260
1261 expect_char(input, '=');
1262
1263 long intVal = load_number(input);
1264 EXPECT_EQ(intVal, 42);
1265
1266 expect_char(input, '+');
1267
1268 double dblVal = load_double(input);
1269 EXPECT_NEAR(dblVal, 3.14, 0.01);
1270
1271 expect_char(input, ';');
1272}
1273
1275{
1276 // Note: load_number expects whitespace after number, so use spaces before ';'
1277 create_temp_file("// comment\nx = 10 ; /* another */ y = 20 ;");
1278 ifstream input(temp_filename);
1279
1283 expect_char(input, '=');
1285 expect_char(input, ';');
1286
1290 expect_char(input, '=');
1292}
size_t size() const noexcept
Count the number of elements of the list.
Definition htlist.H:1319
Exception class for parsing errors with location information.
void SetUp() override
void create_temp_file(const string &content)
void TearDown() override
#define TEST(name)
Main namespace for Aleph-w library functions.
Definition ah-arena.H:89
void restore_position(std::ifstream &input_stream, const StreamPosition &pos)
Restore a previously marked position.
int current_line_number
Current line number in the input stream.
char load_char_literal(std::ifstream &input_stream)
Load a character literal.
bool is_keyword(const std::string &s, const std::vector< std::string > &keywords)
Check if a string is in a list of keywords.
void skip_block_comment(std::ifstream &input_stream, const std::string &open="/" "*", const std::string &close="*" "/")
Skip a block comment (C-style)
std::string load_file_contents(const std::string &filename)
Load entire file contents into a string.
void init_token_scanning()
Initialize token scanning by recording current position.
void close_token_scanning(const char *buffer, char *&start_addr, const char *end_addr)
Finalize token scanning by null-terminating and saving the token.
Container< std::string > split_string(const std::string &s, const std::string &delim)
Split a std::string by a set of delimiter characters.
int read_char_from_stream(std::ifstream &input_stream)
Read a single character from an input stream with position tracking.
void expect_char(std::ifstream &input_stream, char expected)
Expect and consume a specific character.
void expect(std::ifstream &input_stream, const std::string &expected)
Expect and consume a specific string/keyword.
StreamPosition mark_position(std::ifstream &input_stream)
Mark the current position for potential backtracking.
std::vector< std::string > load_file_lines(const std::string &filename)
Load file as a vector of lines.
double load_double(std::ifstream &input_stream)
Load a floating-point number from the input stream.
int current_col_number
Current column number in the input stream.
int previous_col_number
Column number at the start of the current token.
std::string trim(const std::string &s)
Return a trimmed copy of a std::string (leading + trailing whitespace removed).
std::string token_instance
The most recently scanned token.
void put_char_in_buffer(char *&start_addr, const char *end_addr, int c)
Append a character to a buffer with bounds checking.
Fw_Itor remove(Fw_Itor __first, const Fw_Itor &__last, const T &__value)
Remove elements equal to a value.
Definition ahAlgo.H:962
bool try_char(std::ifstream &input_stream, char ch)
Try to match a character without throwing.
char process_escape(const int c)
Process an escape sequence.
std::string to_string(const time_t t, const std::string &format)
Format a time_t value into a string using format.
Definition ah-date.H:140
long load_binary_number(std::ifstream &input_stream)
Load a binary number from the input stream.
std::string command_line_to_string(int argc, char *argv[])
Convert command line arguments to a single string.
std::string load_identifier(std::ifstream &input_stream)
Load an identifier from the input stream.
void reset_parse_state()
Reset the parsing state to initial values.
int previous_line_number
Line number at the start of the current token.
void skip_white_spaces(std::ifstream &input_stream)
Skip whitespace characters in the input stream.
std::string token_type_to_string(TokenType type)
Convert TokenType to string for debugging.
long load_octal_number(std::ifstream &input_stream)
Load an octal number from the input stream.
std::string load_escaped_string(std::ifstream &input_stream)
Load a string with escape sequence processing.
constexpr size_t Buffer_Size
Default buffer size for token parsing.
std::string load_string(std::ifstream &input_stream)
Load a string from the input stream.
void skip_whitespace_and_comments(std::ifstream &input_stream)
Skip whitespace and comments (C/C++ style)
int peek_char(std::ifstream &input_stream)
Peek at the next character without consuming it.
long load_hex_number(std::ifstream &input_stream)
Load a hexadecimal number from the input stream.
DynList< T > maps(const C &c, Op op)
Classic map operation.
void skip_line_comment(std::ifstream &input_stream)
Skip a line comment (// style or # style)
long load_number(std::ifstream &input_stream)
Load an integer number from the input stream.
STL namespace.
Comprehensive parsing utilities for text processing and compiler construction.
TEST_F(ParseUtilsTest, ReadCharBasic)
Represents a location in source code.
int column
Column number (1-based)
int line
Line number (1-based)
std::string filename
Name of the source file.
std::string to_string() const
Convert to human-readable string.
static SourceLocation current(const std::string &file="")
Construct from the current global parsing state.
Structure to save stream position for backtracking.
Structure representing a lexical token.