Aleph-w 3.0
A C++ Library for Data Structures and Algorithms
Loading...
Searching...
No Matches
parse_utils.H
Go to the documentation of this file.
1/*
2 Aleph_w
3
4 Data structures & Algorithms
5 version 2.0.0b
6 https://github.com/lrleon/Aleph-w
7
8 This file is part of Aleph-w library
9
10 Copyright (c) 2002-2026 Leandro Rabindranath Leon
11
12 Permission is hereby granted, free of charge, to any person obtaining a copy
13 of this software and associated documentation files (the "Software"), to deal
14 in the Software without restriction, including without limitation the rights
15 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16 copies of the Software, and to permit persons to whom the Software is
17 furnished to do so, subject to the following conditions:
18
19 The above copyright notice and this permission notice shall be included in all
20 copies or substantial portions of the Software.
21
22 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 SOFTWARE.
29*/
30
31
106#ifndef PARSE_UTILS_H
107#define PARSE_UTILS_H
108
109#include <string>
110#include <fstream>
111#include <sstream>
112#include <iostream>
113#include <cstdlib>
114#include <cctype>
115#include <cmath>
116#include <utility>
117#include <vector>
118#include <stdexcept>
119#include <algorithm>
120#include <aleph.H>
121#include <ah-errors.H>
122# include <ah-string-utils.H>
123
124namespace Aleph
125{
126 // ============================================================================
127 // Global State (inline for header-only)
128 // ============================================================================
129
136 inline constexpr size_t Buffer_Size = 512;
137
144 inline int current_line_number = 1;
145
152 inline int current_col_number = 1;
153
160 inline int previous_line_number = 1;
161
168 inline int previous_col_number = 1;
169
176 inline std::string token_instance;
177
178
179 // ============================================================================
180 // Buffer Management
181 // ============================================================================
182
207 inline void put_char_in_buffer(char *& start_addr, const char *end_addr, int c)
208 {
209 if (start_addr >= end_addr)
210 AH_ERROR("String too long (buffer overflow in parse_utils)");
211
212 *start_addr++ = static_cast<char>(c);
213 }
214
215 // ============================================================================
216 // Token Scanning
217 // ============================================================================
218
240
253 inline void close_token_scanning(const char *buffer, char *& start_addr, const char *end_addr)
254 {
255 put_char_in_buffer(start_addr, end_addr, '\0');
256 token_instance = buffer;
257 }
258
259 // ============================================================================
260 // Stream Reading
261 // ============================================================================
262
285 inline int read_char_from_stream(std::ifstream & input_stream)
286 {
288 << "end of file has been reached";
289
290 int c = input_stream.get();
291
292 if (c == '\n')
293 {
296 }
297 else
299
300 return c;
301 }
302
321 inline void skip_white_spaces(std::ifstream & input_stream)
322 {
323 while (std::isspace(read_char_from_stream(input_stream))) {} // Skip whitespace
324 input_stream.unget();
325 }
326
327 // ============================================================================
328 // Token Loading
329 // ============================================================================
330
358 inline long load_number(std::ifstream & input_stream)
359 {
360 char buffer[Buffer_Size];
361 char *start_addr = buffer;
362 char *end_addr = buffer + Buffer_Size;
363
365
366 try
367 {
370
371 // Handle optional minus sign
372 if (c == '-')
373 {
374 put_char_in_buffer(start_addr, end_addr, c);
376
377 if (not std::isdigit(c))
378 return std::strtol(buffer, nullptr, 10);
379 }
380
381 while (true)
382 {
383 // Read digits
384 while (std::isdigit(c))
385 {
386 put_char_in_buffer(start_addr, end_addr, c);
388 }
389
390 // Number ends at whitespace
391 if (std::isspace(c))
392 {
393 close_token_scanning(buffer, start_addr, end_addr);
394 return std::strtol(buffer, nullptr, 10);
395 }
396
397 ah_domain_error_if(true) << "Invalid number format";
398 }
399 }
400 catch (const std::out_of_range &)
401 {
402 // EOF reached - return what we have
403 close_token_scanning(buffer, start_addr, end_addr);
404 return std::strtol(buffer, nullptr, 10);
405 }
406 }
407
437 inline std::string load_string(std::ifstream & input_stream)
438 {
439 char buffer[Buffer_Size];
440 char *start_addr = buffer;
441 const char *end_addr = buffer + Buffer_Size;
442
443 bool quoted = false;
444
447
449
450 if (c == '\"')
451 quoted = true;
452 else
453 put_char_in_buffer(start_addr, end_addr, c);
454
455 while (true)
456 {
458
459 // End of quoted string
460 if (c == '\"' and quoted)
461 break;
462
463 // End of unquoted string
464 if (not quoted and (std::isblank(c) or c == '\n' or c == EOF))
465 break;
466
467 put_char_in_buffer(start_addr, end_addr, c);
468 }
469
470 close_token_scanning(buffer, start_addr, end_addr);
471 return {buffer};
472 }
473
474 // ============================================================================
475 // Error Reporting
476 // ============================================================================
477
497 [[noreturn]]
498 inline void print_parse_error_and_exit(const std::string & str)
499 {
500 std::cout << "Parse error "
501 << previous_line_number << ":" << previous_col_number << " "
502 << str << std::endl
503 << "Last token seen is: " << token_instance << std::endl;
504 std::exit(1);
505 }
506
523 inline void print_parse_warning(const std::string & str)
524 {
525 std::cout << "Parse warning "
526 << previous_line_number << ":" << previous_col_number << " "
527 << str << std::endl
528 << "Last token seen is: " << token_instance << std::endl;
529 }
530
531 // ============================================================================
532 // Utility Functions
533 // ============================================================================
534
556 inline std::string command_line_to_string(int argc, char *argv[])
557 {
558 std::string command_line;
559
560 for (int i = 0; i < argc; ++i)
561 {
562 command_line += " ";
563 command_line += argv[i];
564 }
565
566 return command_line;
567 }
568
582 inline void reset_parse_state()
583 {
588 token_instance.clear();
589 }
590
591 // ============================================================================
592 // Source Location and Parse Errors
593 // ============================================================================
594
602 {
603 std::string filename;
604 int line = 1;
605 int column = 1;
606
608 SourceLocation() = default;
609
611 SourceLocation(std::string file, const int ln, const int col)
612 : filename(std::move(file)), line(ln), column(col) {}
613
615 static SourceLocation current(const std::string & file = "")
616 {
618 }
619
621 static SourceLocation previous(const std::string & file = "")
622 {
624 }
625
630 [[nodiscard]] std::string to_string() const
631 {
632 std::ostringstream oss;
633 if (not filename.empty())
634 oss << filename << ":";
635 oss << line << ":" << column;
636 return oss.str();
637 }
638 };
639
652 class ParseError : public std::runtime_error
653 {
655
656 public:
662 ParseError(const std::string & message, const SourceLocation & location)
663 : std::runtime_error(location.to_string() + ": " + message)
664 , loc(location)
665 {}
666
671 explicit ParseError(const std::string & message)
672 : std::runtime_error(std::to_string(previous_line_number) + ":" +
675 {}
676
678 [[nodiscard]] const SourceLocation &location() const { return loc; }
679 };
680
681 // ============================================================================
682 // Lookahead and Backtracking
683 // ============================================================================
684
689 {
690 std::streampos pos;
691 int line{};
692 int column{};
693 };
694
711 inline int peek_char(std::ifstream & input_stream)
712 {
713 return input_stream.peek();
714 }
715
728 {
730 sp.pos = input_stream.tellg();
731 sp.line = current_line_number;
732 sp.column = current_col_number;
733 return sp;
734 }
735
747 inline void restore_position(std::ifstream & input_stream, const StreamPosition & pos)
748 {
749 input_stream.seekg(pos.pos);
752 }
753
754 // ============================================================================
755 // Comment Handling
756 // ============================================================================
757
775 inline void skip_line_comment(std::ifstream & input_stream)
776 {
777 while (not input_stream.eof())
778 {
779 if (const int c = input_stream.get(); c == '\n')
780 {
783 return;
784 }
786 }
787 }
788
804 inline void skip_block_comment(std::ifstream & input_stream,
805 const std::string & open = "/" "*",
806 const std::string & close = "*" "/")
807 {
808 (void) open; // Documented for clarity; opening sequence already consumed.
809 size_t match_pos = 0;
810
811 while (not input_stream.eof())
812 {
813 if (const int c = input_stream.get(); c == '\n')
814 {
817 match_pos = 0;
818 }
819 else
820 {
822
823 if (c == close[match_pos])
824 {
825 match_pos++;
826 if (match_pos == close.size())
827 return; // Found closing sequence
828 }
829 else
830 match_pos = (c == close[0]) ? 1 : 0;
831 }
832 }
833
834 throw ParseError("Unterminated block comment");
835 }
836
851 inline void skip_whitespace_and_comments(std::ifstream & input_stream)
852 {
853 while (not input_stream.eof())
854 {
855 if (const int c = peek_char(input_stream); std::isspace(c))
857 else if (c == '/')
858 {
859 // Peek ahead for // or /* ... */
862 if (const int next = peek_char(input_stream); next == '/')
863 {
867 }
868 else if (next == '*')
869 {
873 }
874 else
875 {
876 // Not a comment, put back the '/'
877 input_stream.unget();
879 return;
880 }
881 }
882 else if (c == '#')
883 {
884 // Shell/Python style line comment
887 }
888 else
889 return; // Non-whitespace, non-comment found
890 }
891 }
892
893 // ============================================================================
894 // Numeric Parsing (Extended)
895 // ============================================================================
896
921 inline double load_double(std::ifstream & input_stream)
922 {
923 char buffer[Buffer_Size];
924 char *ptr = buffer;
925 char *end = buffer + Buffer_Size;
926
929
931
932 // Optional sign
933 if (c == '-' or c == '+')
934 {
935 put_char_in_buffer(ptr, end, c);
937 }
938
939 bool has_digits = false;
940
941 // Integer part
942 while (std::isdigit(c))
943 {
944 has_digits = true;
945 put_char_in_buffer(ptr, end, c);
947 }
948
949 // Decimal part
950 if (c == '.')
951 {
952 put_char_in_buffer(ptr, end, c);
954
955 while (std::isdigit(c))
956 {
957 has_digits = true;
958 put_char_in_buffer(ptr, end, c);
960 }
961 }
962
963 if (not has_digits)
964 throw ParseError("Invalid floating-point number: no digits");
965
966 // Exponent part
967 if (c == 'e' or c == 'E')
968 {
969 put_char_in_buffer(ptr, end, c);
971
972 if (c == '+' or c == '-')
973 {
974 put_char_in_buffer(ptr, end, c);
976 }
977
978 if (not std::isdigit(c))
979 throw ParseError("Invalid exponent in floating-point number");
980
981 while (std::isdigit(c))
982 {
983 put_char_in_buffer(ptr, end, c);
985 }
986 }
987
988 // Put back the terminating character
989 input_stream.unget();
990 if (c != '\n')
992
993 close_token_scanning(buffer, ptr, end);
994 return std::strtod(buffer, nullptr);
995 }
996
1011 inline long load_hex_number(std::ifstream & input_stream)
1012 {
1013 char buffer[Buffer_Size];
1014 char *ptr = buffer;
1015 char *end = buffer + Buffer_Size;
1016
1019
1021
1022 // Expect '0'
1023 if (c != '0')
1024 throw ParseError("Hexadecimal number must start with 0x");
1025
1027 if (c != 'x' and c != 'X')
1028 throw ParseError("Hexadecimal number must start with 0x");
1029
1031
1032 if (not std::isxdigit(c))
1033 throw ParseError("Invalid hexadecimal number");
1034
1035 while (std::isxdigit(c))
1036 {
1037 put_char_in_buffer(ptr, end, c);
1039 }
1040
1041 input_stream.unget();
1042 if (c != '\n')
1044
1045 close_token_scanning(buffer, ptr, end);
1046 return std::strtol(buffer, nullptr, 16);
1047 }
1048
1063 inline long load_octal_number(std::ifstream & input_stream)
1064 {
1065 char buffer[Buffer_Size];
1066 char *ptr = buffer;
1067 char *end = buffer + Buffer_Size;
1068
1071
1073
1074 if (c != '0')
1075 throw ParseError("Octal number must start with 0");
1076
1078
1079 // Check it's not hex (0x)
1080 if (c == 'x' or c == 'X')
1081 throw ParseError("Expected octal number, got hexadecimal");
1082
1083 while (c >= '0' and c <= '7')
1084 {
1085 put_char_in_buffer(ptr, end, c);
1087 }
1088
1089 input_stream.unget();
1090 if (c != '\n')
1092
1093 close_token_scanning(buffer, ptr, end);
1094
1095 if (ptr == buffer)
1096 return 0; // Just "0"
1097
1098 return std::strtol(buffer, nullptr, 8);
1099 }
1100
1115 inline long load_binary_number(std::ifstream & input_stream)
1116 {
1117 char buffer[Buffer_Size];
1118 char *ptr = buffer;
1119 char *end = buffer + Buffer_Size;
1120
1123
1125
1126 if (c != '0')
1127 throw ParseError("Binary number must start with 0b");
1128
1130 if (c != 'b' and c != 'B')
1131 throw ParseError("Binary number must start with 0b");
1132
1134
1135 if (c != '0' and c != '1')
1136 throw ParseError("Invalid binary number");
1137
1138 while (c == '0' or c == '1')
1139 {
1140 put_char_in_buffer(ptr, end, c);
1142 }
1143
1144 input_stream.unget();
1145 if (c != '\n')
1147
1148 close_token_scanning(buffer, ptr, end);
1149 return std::strtol(buffer, nullptr, 2);
1150 }
1151
1152 // ============================================================================
1153 // Identifier and Keyword Parsing
1154 // ============================================================================
1155
1171 inline std::string load_identifier(std::ifstream & input_stream)
1172 {
1173 char buffer[Buffer_Size];
1174 char *ptr = buffer;
1175 char *end = buffer + Buffer_Size;
1176
1179
1181
1182 // Must start with a letter or underscore
1183 if (not std::isalpha(c) and c != '_')
1184 throw ParseError("Identifier must start with letter or underscore");
1185
1186 put_char_in_buffer(ptr, end, c);
1187
1188 while (true)
1189 {
1191
1192 if (std::isalnum(c) or c == '_')
1193 put_char_in_buffer(ptr, end, c);
1194 else
1195 break;
1196 }
1197
1198 input_stream.unget();
1199 if (c != '\n')
1201
1202 close_token_scanning(buffer, ptr, end);
1203 return {buffer};
1204 }
1205
1221 inline bool is_keyword(const std::string & s, const std::vector<std::string> & keywords)
1222 {
1223 return std::find(keywords.begin(), keywords.end(), s) != keywords.end();
1224 }
1225
1226 // ============================================================================
1227 // Expectation Functions
1228 // ============================================================================
1229
1248 inline void expect_char(std::ifstream & input_stream, char expected)
1249 {
1251 if (const int c = read_char_from_stream(input_stream); c != expected)
1252 {
1253 std::string msg = "Expected '";
1254 msg += expected;
1255 msg += "' but found '";
1256 msg += static_cast<char>(c);
1257 msg += "'";
1258 throw ParseError(msg);
1259 }
1260 }
1261
1279 inline void expect(std::ifstream & input_stream, const std::string & expected)
1280 {
1283
1284 for (const char ch: expected)
1285 if (const int c = read_char_from_stream(input_stream); c != ch)
1286 throw ParseError("Expected '" + expected + "'");
1287
1289 }
1290
1302 inline bool try_char(std::ifstream & input_stream, char ch)
1303 {
1306
1307 if (const int c = read_char_from_stream(input_stream); c == ch)
1308 return true;
1309
1311 return false;
1312 }
1313
1314 // ============================================================================
1315 // String Parsing (Extended)
1316 // ============================================================================
1317
1336 inline char process_escape(const int c)
1337 {
1338 switch (c)
1339 {
1340 case 'n': return '\n';
1341 case 't': return '\t';
1342 case 'r': return '\r';
1343 case '\\': return '\\';
1344 case '"': return '"';
1345 case '\'': return '\'';
1346 case '0': return '\0';
1347 case 'a': return '\a';
1348 case 'b': return '\b';
1349 case 'f': return '\f';
1350 case 'v': return '\v';
1351 default: return static_cast<char>(c); // Unknown escape, keep as-is
1352 }
1353 }
1354
1373 inline std::string load_escaped_string(std::ifstream & input_stream)
1374 {
1375 char buffer[Buffer_Size];
1376 char *ptr = buffer;
1377 const char *end = buffer + Buffer_Size;
1378
1381
1383
1384 if (c != '"')
1385 throw ParseError("Expected '\"' to start string");
1386
1387 while (true)
1388 {
1390
1391 if (c == EOF)
1392 throw ParseError("Unterminated string literal");
1393
1394 if (c == '"')
1395 break;
1396
1397 if (c == '\\')
1398 {
1400 if (c == EOF)
1401 throw ParseError("Unterminated escape sequence");
1402 c = static_cast<unsigned char>(process_escape(c));
1403 }
1404
1405 put_char_in_buffer(ptr, end, c);
1406 }
1407
1408 close_token_scanning(buffer, ptr, end);
1409 return {buffer};
1410 }
1411
1428 inline char load_char_literal(std::ifstream & input_stream)
1429 {
1432
1434
1435 if (c != '\'')
1436 throw ParseError("Expected single quote to start character literal");
1437
1439
1440 if (c == '\\')
1441 {
1443 c = static_cast<unsigned char>(process_escape(c));
1444 }
1445
1446 char result = static_cast<char>(c);
1447
1449 if (c != '\'')
1450 throw ParseError("Expected single quote to end character literal");
1451
1452 token_instance = "'";
1453 token_instance += result;
1454 token_instance += "'";
1455
1456 return result;
1457 }
1458
1459 // ============================================================================
1460 // Token Types (for lexer support)
1461 // ============================================================================
1462
1466 enum class TokenType
1467 {
1468 END_OF_FILE,
1469 IDENTIFIER,
1470 INTEGER,
1471 FLOAT,
1472 STRING,
1473 CHAR,
1474 OPERATOR,
1475 PUNCTUATION,
1476 KEYWORD,
1477 COMMENT,
1478 UNKNOWN
1479 };
1480
1484 inline std::string token_type_to_string(TokenType type)
1485 {
1486 switch (type)
1487 {
1488 case TokenType::END_OF_FILE: return "EOF";
1489 case TokenType::IDENTIFIER: return "IDENTIFIER";
1490 case TokenType::INTEGER: return "INTEGER";
1491 case TokenType::FLOAT: return "FLOAT";
1492 case TokenType::STRING: return "STRING";
1493 case TokenType::CHAR: return "CHAR";
1494 case TokenType::OPERATOR: return "OPERATOR";
1495 case TokenType::PUNCTUATION: return "PUNCTUATION";
1496 case TokenType::KEYWORD: return "KEYWORD";
1497 case TokenType::COMMENT: return "COMMENT";
1498 case TokenType::UNKNOWN: return "UNKNOWN";
1499 default: return "???";
1500 }
1501 }
1502
1506 struct Token
1507 {
1509 std::string value;
1511
1513 Token() = default;
1514
1516 Token(TokenType t, std::string v, SourceLocation loc)
1517 : type(t), value(std::move(v)), location(std::move(loc)) {}
1518
1520 [[nodiscard]] bool is_eof() const { return type == TokenType::END_OF_FILE; }
1521
1523 [[nodiscard]] std::string to_string() const
1524 {
1525 return token_type_to_string(type) + "(\"" + value + "\") at " +
1527 }
1528 };
1529
1530 // ============================================================================
1531 // File Utilities
1532 // ============================================================================
1533
1542 inline std::string load_file_contents(const std::string & filename)
1543 {
1544 std::ifstream file(filename);
1545 if (! file.is_open())
1546 throw std::runtime_error("Cannot open file: " + filename);
1547
1548 std::ostringstream ss;
1549 ss << file.rdbuf();
1550 return ss.str();
1551 }
1552
1561 inline std::vector<std::string> load_file_lines(const std::string & filename)
1562 {
1563 std::ifstream file(filename);
1564 if (not file.is_open())
1565 throw std::runtime_error("Cannot open file: " + filename);
1566
1567 std::vector<std::string> lines;
1568 std::string line;
1569
1570 while (std::getline(file, line))
1571 lines.push_back(line);
1572
1573 return lines;
1574 }
1575
1583 inline std::vector<std::string> split_string(const std::string & str, char delimiter)
1584 {
1585 std::vector<std::string> result;
1586 std::istringstream iss(str);
1587 std::string token;
1588
1589 while (std::getline(iss, token, delimiter))
1590 result.push_back(token);
1591
1592 return result;
1593 }
1594} // namespace Aleph
1595
1596// ============================================================================
1597// Global namespace compatibility (for existing code)
1598// ============================================================================
1599
1600// Note: input_file_name is NOT exported to global namespace because
1601// many applications define their own variable with this name.
1602// Use Aleph::input_file_name if needed.
1603
1604// Global state
1605using Aleph::Buffer_Size;
1611
1612// Core functions
1618using Aleph::load_number;
1619using Aleph::load_string;
1624
1625// Source location and errors
1627using Aleph::ParseError;
1628
1629// Lookahead and backtracking
1631using Aleph::peek_char;
1634
1635// Comments
1639
1640// Extended numeric parsing
1641using Aleph::load_double;
1645
1646// Identifiers and keywords
1648using Aleph::is_keyword;
1649
1650// Expectations
1651using Aleph::expect_char;
1652using Aleph::expect;
1653using Aleph::try_char;
1654
1655// Extended string parsing
1659
1660// Tokens
1661using Aleph::TokenType;
1663using Aleph::Token;
1664
1665// File utilities
1669using Aleph::trim;
1670
1671// ============================================================================
1672// Legacy Macro (preserved for backward compatibility)
1673// ============================================================================
1674
1689#define PRINT_ERROR(str, args...) ( \
1690 (std::cout << input_file_name << "(" \
1691 << Aleph::previous_line_number << "," \
1692 << Aleph::previous_col_number << "): " << '\n'), \
1693 (std::cout << "Last token: " << Aleph::token_instance << '\n'), \
1694 AH_ERROR(str, ##args))
1695
1696#endif // PARSE_UTILS_H
Exception handling system with formatted messages for Aleph-w.
#define ah_out_of_range_error_if(C)
Throws std::out_of_range if condition holds.
Definition ah-errors.H:579
#define ah_domain_error_if(C)
Throws std::domain_error if condition holds.
Definition ah-errors.H:522
String manipulation utilities.
#define AH_ERROR(format, args...)
Print an error message (always enabled).
Definition ahDefs.H:271
Core header for the Aleph-w library.
string command_line
Definition btreepic.C:403
void empty() noexcept
empty the list
Definition htlist.H:1689
size_t size() const noexcept
Count the number of elements of the list.
Definition htlist.H:1319
Exception class for parsing errors with location information.
ParseError(const std::string &message, const SourceLocation &location)
Construct a parse error with message and location.
ParseError(const std::string &message)
Construct a parse error with message only.
SourceLocation loc
const SourceLocation & location() const
Get the source location of the error.
iterator end() noexcept
Return an STL-compatible end iterator.
iterator begin() noexcept
Return an STL-compatible iterator to the first element.
Main namespace for Aleph-w library functions.
Definition ah-arena.H:89
void restore_position(std::ifstream &input_stream, const StreamPosition &pos)
Restore a previously marked position.
int current_line_number
Current line number in the input stream.
char load_char_literal(std::ifstream &input_stream)
Load a character literal.
bool is_keyword(const std::string &s, const std::vector< std::string > &keywords)
Check if a string is in a list of keywords.
void message(const char *file, int line, const char *format,...)
Print an informational message with file and line info.
Definition ahDefs.C:100
void skip_block_comment(std::ifstream &input_stream, const std::string &open="/" "*", const std::string &close="*" "/")
Skip a block comment (C-style)
std::string load_file_contents(const std::string &filename)
Load entire file contents into a string.
void init_token_scanning()
Initialize token scanning by recording current position.
void close_token_scanning(const char *buffer, char *&start_addr, const char *end_addr)
Finalize token scanning by null-terminating and saving the token.
Container< std::string > split_string(const std::string &s, const std::string &delim)
Split a std::string by a set of delimiter characters.
int read_char_from_stream(std::ifstream &input_stream)
Read a single character from an input stream with position tracking.
void expect_char(std::ifstream &input_stream, char expected)
Expect and consume a specific character.
void expect(std::ifstream &input_stream, const std::string &expected)
Expect and consume a specific string/keyword.
void print_parse_error_and_exit(const std::string &str)
Print a parse error message and terminate the program.
StreamPosition mark_position(std::ifstream &input_stream)
Mark the current position for potential backtracking.
std::vector< std::string > load_file_lines(const std::string &filename)
Load file as a vector of lines.
double load_double(std::ifstream &input_stream)
Load a floating-point number from the input stream.
int current_col_number
Current column number in the input stream.
int previous_col_number
Column number at the start of the current token.
std::string trim(const std::string &s)
Return a trimmed copy of a std::string (leading + trailing whitespace removed).
std::string token_instance
The most recently scanned token.
void put_char_in_buffer(char *&start_addr, const char *end_addr, int c)
Append a character to a buffer with bounds checking.
bool try_char(std::ifstream &input_stream, char ch)
Try to match a character without throwing.
char process_escape(const int c)
Process an escape sequence.
std::string to_string(const time_t t, const std::string &format)
Format a time_t value into a string using format.
Definition ah-date.H:140
long load_binary_number(std::ifstream &input_stream)
Load a binary number from the input stream.
std::string command_line_to_string(int argc, char *argv[])
Convert command line arguments to a single string.
std::string load_identifier(std::ifstream &input_stream)
Load an identifier from the input stream.
void reset_parse_state()
Reset the parsing state to initial values.
int previous_line_number
Line number at the start of the current token.
void skip_white_spaces(std::ifstream &input_stream)
Skip whitespace characters in the input stream.
std::string token_type_to_string(TokenType type)
Convert TokenType to string for debugging.
long load_octal_number(std::ifstream &input_stream)
Load an octal number from the input stream.
void next()
Advance all underlying iterators (bounds-checked).
Definition ah-zip.H:175
std::string load_escaped_string(std::ifstream &input_stream)
Load a string with escape sequence processing.
constexpr size_t Buffer_Size
Default buffer size for token parsing.
TokenType
Enumeration of basic token types.
@ CHAR
Character literal.
@ END_OF_FILE
End of input.
@ INTEGER
Integer literal.
@ STRING
String literal.
@ UNKNOWN
Unknown/invalid token.
@ IDENTIFIER
Variable/function name.
@ KEYWORD
Reserved keyword.
@ PUNCTUATION
Punctuation (;, {, }, etc.)
@ OPERATOR
Operator (+, -, etc.)
@ FLOAT
Floating-point literal.
@ COMMENT
Comment (if preserving)
std::string load_string(std::ifstream &input_stream)
Load a string from the input stream.
void skip_whitespace_and_comments(std::ifstream &input_stream)
Skip whitespace and comments (C/C++ style)
int peek_char(std::ifstream &input_stream)
Peek at the next character without consuming it.
long load_hex_number(std::ifstream &input_stream)
Load a hexadecimal number from the input stream.
DynList< T > maps(const C &c, Op op)
Classic map operation.
void skip_line_comment(std::ifstream &input_stream)
Skip a line comment (// style or # style)
void print_parse_warning(const std::string &str)
Print a parse warning message.
long load_number(std::ifstream &input_stream)
Load an integer number from the input stream.
STL namespace.
Represents a location in source code.
int column
Column number (1-based)
SourceLocation(std::string file, const int ln, const int col)
Construct with all fields.
SourceLocation()=default
Default constructor.
int line
Line number (1-based)
std::string filename
Name of the source file.
std::string to_string() const
Convert to human-readable string.
static SourceLocation previous(const std::string &file="")
Construct from the previous (token start) global parsing state.
static SourceLocation current(const std::string &file="")
Construct from the current global parsing state.
Structure to save stream position for backtracking.
int column
Column number at this position.
int line
Line number at this position.
std::streampos pos
Stream position.
Structure representing a lexical token.
Token(TokenType t, std::string v, SourceLocation loc)
Construct with all fields.
bool is_eof() const
Check if this is an end-of-file token.
std::string value
String value/content.
Token()=default
Default constructor.
std::string to_string() const
String representation for debugging.
SourceLocation location
Where token appeared.
TokenType type
Type of the token.
fstream file[12]
Definition treapObs.C:67