2023-06-23 06:55:56 +00:00
|
|
|
module nimpc.lexer;
|
|
|
|
|
|
|
|
import streams;
|
|
|
|
import std.stdio;
|
|
|
|
import std.regex;
|
|
|
|
import std.array;
|
2023-06-27 04:21:13 +00:00
|
|
|
import std.typecons;
|
|
|
|
import std.uni;
|
|
|
|
import std.string;
|
2023-06-23 06:55:56 +00:00
|
|
|
|
|
|
|
enum TokenType {
|
|
|
|
KEYWORD,
|
|
|
|
DATA_TYPE,
|
|
|
|
SENTENCE_END,
|
|
|
|
LITERAL_INTEGER,
|
|
|
|
LITERAL_FLOAT,
|
|
|
|
LITERAL_BOOLEAN,
|
2023-06-27 04:21:13 +00:00
|
|
|
LITERAL_STRING,
|
|
|
|
COMMENT_INLINE,
|
|
|
|
SYMBOL
|
2023-06-23 06:55:56 +00:00
|
|
|
}
|
|
|
|
|
2023-06-27 04:21:13 +00:00
|
|
|
immutable struct Token {
|
2023-06-23 06:55:56 +00:00
|
|
|
TokenType type;
|
|
|
|
string content;
|
|
|
|
uint line;
|
|
|
|
uint column;
|
|
|
|
}
|
|
|
|
|
|
|
|
class LexerException : Exception {
|
|
|
|
const uint sourceLine;
|
|
|
|
const uint sourceColumn;
|
|
|
|
|
|
|
|
this(string msg, uint sourceLine, uint sourceColumn) {
|
|
|
|
super(msg);
|
|
|
|
this.sourceLine = sourceLine;
|
|
|
|
this.sourceColumn = sourceColumn;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-06-27 04:21:13 +00:00
|
|
|
class StreamEndException : Exception {
|
|
|
|
this() {
|
|
|
|
super("Stream ended.");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-06-23 06:55:56 +00:00
|
|
|
/**
|
|
|
|
* Parses a list of tokens from an input stream of lines of code.
|
|
|
|
* Params:
|
|
|
|
* inputStream = The lines of input to parse.
|
|
|
|
* Returns: A list of tokens.
|
|
|
|
*/
|
|
|
|
Token[] tokenize(S)(S inputStream) if (isInputStream!(S, char)) {
|
|
|
|
Appender!(Token[]) tokenApp;
|
2023-06-27 04:21:13 +00:00
|
|
|
bool streamEnded = false;
|
2023-06-23 06:55:56 +00:00
|
|
|
uint line = 0;
|
|
|
|
uint col = 0;
|
|
|
|
|
2023-06-27 04:21:13 +00:00
|
|
|
try {
|
|
|
|
while (true) {
|
|
|
|
// Trim whitespace from the start of each line.
|
|
|
|
char c = readChar(inputStream, line, col);
|
|
|
|
while (isWhite(c)) {
|
|
|
|
c = readChar(inputStream, line, col);
|
|
|
|
}
|
|
|
|
// Inline comment from here to newline.
|
|
|
|
if (c == '#') {
|
|
|
|
const commentStartCol = col;
|
|
|
|
c = readChar(inputStream, line, col);
|
|
|
|
char[] commentText;
|
|
|
|
while (c != '\n') {
|
|
|
|
commentText ~= c;
|
|
|
|
c = readChar(inputStream, line, col);
|
|
|
|
}
|
|
|
|
tokenApp ~= Token(
|
|
|
|
TokenType.COMMENT_INLINE,
|
|
|
|
strip(cast(string) commentText),
|
|
|
|
line,
|
|
|
|
commentStartCol
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} catch (StreamEndException e) {
|
|
|
|
// This is expected!
|
2023-06-23 06:55:56 +00:00
|
|
|
}
|
|
|
|
return tokenApp[];
|
|
|
|
}
|
|
|
|
|
2023-06-27 04:21:13 +00:00
|
|
|
private char readChar(S)(S stream, ref uint line, ref uint col) if (isInputStream!(S, char)) {
|
|
|
|
char[1] buffer;
|
|
|
|
StreamResult result = stream.readFromStream(buffer);
|
2023-06-23 06:55:56 +00:00
|
|
|
if (result.hasError) {
|
|
|
|
throw new LexerException("Failed to read one more char from stream.", line, col);
|
|
|
|
}
|
2023-06-27 04:21:13 +00:00
|
|
|
if (result.count == 0) throw new StreamEndException();
|
|
|
|
col++;
|
|
|
|
if (buffer[0] == '\n') {
|
|
|
|
line++;
|
|
|
|
col = 0;
|
|
|
|
}
|
|
|
|
return buffer[0];
|
2023-06-23 06:55:56 +00:00
|
|
|
}
|