210 AH_ERROR(
"String too long (buffer overflow in parse_utils)");
288 <<
"end of file has been reached";
377 if (
not std::isdigit(c))
378 return std::strtol(buffer,
nullptr, 10);
384 while (std::isdigit(c))
394 return std::strtol(buffer,
nullptr, 10);
400 catch (
const std::out_of_range &)
404 return std::strtol(buffer,
nullptr, 10);
500 std::cout <<
"Parse error "
525 std::cout <<
"Parse warning "
560 for (
int i = 0; i <
argc; ++i)
632 std::ostringstream
oss;
805 const std::string & open =
"/" "*",
806 const std::string & close =
"*" "/")
834 throw ParseError(
"Unterminated block comment");
868 else if (
next ==
'*')
933 if (c ==
'-' or c ==
'+')
942 while (std::isdigit(c))
955 while (std::isdigit(c))
964 throw ParseError(
"Invalid floating-point number: no digits");
967 if (c ==
'e' or c ==
'E')
972 if (c ==
'+' or c ==
'-')
978 if (
not std::isdigit(c))
979 throw ParseError(
"Invalid exponent in floating-point number");
981 while (std::isdigit(c))
994 return std::strtod(buffer,
nullptr);
1024 throw ParseError(
"Hexadecimal number must start with 0x");
1027 if (c !=
'x' and c !=
'X')
1028 throw ParseError(
"Hexadecimal number must start with 0x");
1032 if (
not std::isxdigit(c))
1033 throw ParseError(
"Invalid hexadecimal number");
1035 while (std::isxdigit(c))
1046 return std::strtol(buffer,
nullptr, 16);
1075 throw ParseError(
"Octal number must start with 0");
1080 if (c ==
'x' or c ==
'X')
1081 throw ParseError(
"Expected octal number, got hexadecimal");
1083 while (c >=
'0' and c <=
'7')
1098 return std::strtol(buffer,
nullptr, 8);
1127 throw ParseError(
"Binary number must start with 0b");
1130 if (c !=
'b' and c !=
'B')
1131 throw ParseError(
"Binary number must start with 0b");
1135 if (c !=
'0' and c !=
'1')
1138 while (c ==
'0' or c ==
'1')
1149 return std::strtol(buffer,
nullptr, 2);
1183 if (
not std::isalpha(c)
and c !=
'_')
1184 throw ParseError(
"Identifier must start with letter or underscore");
1192 if (std::isalnum(c)
or c ==
'_')
1253 std::string msg =
"Expected '";
1255 msg +=
"' but found '";
1256 msg +=
static_cast<char>(c);
1340 case 'n':
return '\n';
1341 case 't':
return '\t';
1342 case 'r':
return '\r';
1343 case '\\':
return '\\';
1344 case '"':
return '"';
1345 case '\'':
return '\'';
1346 case '0':
return '\0';
1347 case 'a':
return '\a';
1348 case 'b':
return '\b';
1349 case 'f':
return '\f';
1350 case 'v':
return '\v';
1351 default:
return static_cast<char>(c);
1385 throw ParseError(
"Expected '\"' to start string");
1392 throw ParseError(
"Unterminated string literal");
1401 throw ParseError(
"Unterminated escape sequence");
1436 throw ParseError(
"Expected single quote to start character literal");
1446 char result =
static_cast<char>(c);
1450 throw ParseError(
"Expected single quote to end character literal");
1499 default:
return "???";
1544 std::ifstream
file(filename);
1545 if (!
file.is_open())
1546 throw std::runtime_error(
"Cannot open file: " + filename);
1548 std::ostringstream
ss;
1563 std::ifstream
file(filename);
1565 throw std::runtime_error(
"Cannot open file: " + filename);
1567 std::vector<std::string>
lines;
1570 while (std::getline(
file, line))
1571 lines.push_back(line);
1585 std::vector<std::string> result;
1586 std::istringstream
iss(str);
1590 result.push_back(
token);
1689#define PRINT_ERROR(str, args...) ( \
1690 (std::cout << input_file_name << "(" \
1691 << Aleph::previous_line_number << "," \
1692 << Aleph::previous_col_number << "): " << '\n'), \
1693 (std::cout << "Last token: " << Aleph::token_instance << '\n'), \
1694 AH_ERROR(str, ##args))
Exception handling system with formatted messages for Aleph-w.
#define ah_out_of_range_error_if(C)
Throws std::out_of_range if condition holds.
#define ah_domain_error_if(C)
Throws std::domain_error if condition holds.
String manipulation utilities.
#define AH_ERROR(format, args...)
Print an error message (always enabled).
Core header for the Aleph-w library.
void empty() noexcept
empty the list
size_t size() const noexcept
Count the number of elements of the list.
Exception class for parsing errors with location information.
ParseError(const std::string &message, const SourceLocation &location)
Construct a parse error with message and location.
ParseError(const std::string &message)
Construct a parse error with message only.
const SourceLocation & location() const
Get the source location of the error.
iterator end() noexcept
Return an STL-compatible end iterator.
iterator begin() noexcept
Return an STL-compatible iterator to the first element.
Main namespace for Aleph-w library functions.
void restore_position(std::ifstream &input_stream, const StreamPosition &pos)
Restore a previously marked position.
int current_line_number
Current line number in the input stream.
char load_char_literal(std::ifstream &input_stream)
Load a character literal.
bool is_keyword(const std::string &s, const std::vector< std::string > &keywords)
Check if a string is in a list of keywords.
void message(const char *file, int line, const char *format,...)
Print an informational message with file and line info.
void skip_block_comment(std::ifstream &input_stream, const std::string &open="/" "*", const std::string &close="*" "/")
Skip a block comment (C-style)
std::string load_file_contents(const std::string &filename)
Load entire file contents into a string.
void init_token_scanning()
Initialize token scanning by recording current position.
void close_token_scanning(const char *buffer, char *&start_addr, const char *end_addr)
Finalize token scanning by null-terminating and saving the token.
Container< std::string > split_string(const std::string &s, const std::string &delim)
Split a std::string by a set of delimiter characters.
int read_char_from_stream(std::ifstream &input_stream)
Read a single character from an input stream with position tracking.
void expect_char(std::ifstream &input_stream, char expected)
Expect and consume a specific character.
void expect(std::ifstream &input_stream, const std::string &expected)
Expect and consume a specific string/keyword.
void print_parse_error_and_exit(const std::string &str)
Print a parse error message and terminate the program.
StreamPosition mark_position(std::ifstream &input_stream)
Mark the current position for potential backtracking.
std::vector< std::string > load_file_lines(const std::string &filename)
Load file as a vector of lines.
double load_double(std::ifstream &input_stream)
Load a floating-point number from the input stream.
int current_col_number
Current column number in the input stream.
int previous_col_number
Column number at the start of the current token.
std::string trim(const std::string &s)
Return a trimmed copy of a std::string (leading + trailing whitespace removed).
std::string token_instance
The most recently scanned token.
void put_char_in_buffer(char *&start_addr, const char *end_addr, int c)
Append a character to a buffer with bounds checking.
bool try_char(std::ifstream &input_stream, char ch)
Try to match a character without throwing.
char process_escape(const int c)
Process an escape sequence.
std::string to_string(const time_t t, const std::string &format)
Format a time_t value into a string using format.
long load_binary_number(std::ifstream &input_stream)
Load a binary number from the input stream.
std::string command_line_to_string(int argc, char *argv[])
Convert command line arguments to a single string.
std::string load_identifier(std::ifstream &input_stream)
Load an identifier from the input stream.
void reset_parse_state()
Reset the parsing state to initial values.
int previous_line_number
Line number at the start of the current token.
void skip_white_spaces(std::ifstream &input_stream)
Skip whitespace characters in the input stream.
std::string token_type_to_string(TokenType type)
Convert TokenType to string for debugging.
long load_octal_number(std::ifstream &input_stream)
Load an octal number from the input stream.
void next()
Advance all underlying iterators (bounds-checked).
std::string load_escaped_string(std::ifstream &input_stream)
Load a string with escape sequence processing.
constexpr size_t Buffer_Size
Default buffer size for token parsing.
TokenType
Enumeration of basic token types.
@ END_OF_FILE
End of input.
@ INTEGER
Integer literal.
@ UNKNOWN
Unknown/invalid token.
@ IDENTIFIER
Variable/function name.
@ KEYWORD
Reserved keyword.
@ PUNCTUATION
Punctuation (;, {, }, etc.)
@ OPERATOR
Operator (+, -, etc.)
@ FLOAT
Floating-point literal.
@ COMMENT
Comment (if preserving)
std::string load_string(std::ifstream &input_stream)
Load a string from the input stream.
void skip_whitespace_and_comments(std::ifstream &input_stream)
Skip whitespace and comments (C/C++ style)
int peek_char(std::ifstream &input_stream)
Peek at the next character without consuming it.
long load_hex_number(std::ifstream &input_stream)
Load a hexadecimal number from the input stream.
DynList< T > maps(const C &c, Op op)
Classic map operation.
void skip_line_comment(std::ifstream &input_stream)
Skip a line comment (// style or # style)
void print_parse_warning(const std::string &str)
Print a parse warning message.
long load_number(std::ifstream &input_stream)
Load an integer number from the input stream.
Represents a location in source code.
int column
Column number (1-based)
SourceLocation(std::string file, const int ln, const int col)
Construct with all fields.
SourceLocation()=default
Default constructor.
int line
Line number (1-based)
std::string filename
Name of the source file.
std::string to_string() const
Convert to human-readable string.
static SourceLocation previous(const std::string &file="")
Construct from the previous (token start) global parsing state.
static SourceLocation current(const std::string &file="")
Construct from the current global parsing state.
Structure to save stream position for backtracking.
int column
Column number at this position.
int line
Line number at this position.
std::streampos pos
Stream position.
Structure representing a lexical token.
Token(TokenType t, std::string v, SourceLocation loc)
Construct with all fields.
bool is_eof() const
Check if this is an end-of-file token.
std::string value
String value/content.
Token()=default
Default constructor.
std::string to_string() const
String representation for debugging.
SourceLocation location
Where token appeared.
TokenType type
Type of the token.