nimp/compiler/source/nimpc/lexer.d

103 lines
2.5 KiB
D

module nimpc.lexer;
import streams;
import std.stdio;
import std.regex;
import std.array;
import std.typecons;
import std.uni;
import std.string;
enum TokenType {
KEYWORD,
DATA_TYPE,
SENTENCE_END,
LITERAL_INTEGER,
LITERAL_FLOAT,
LITERAL_BOOLEAN,
LITERAL_STRING,
COMMENT_INLINE,
SYMBOL
}
immutable struct Token {
TokenType type;
string content;
uint line;
uint column;
}
class LexerException : Exception {
const uint sourceLine;
const uint sourceColumn;
this(string msg, uint sourceLine, uint sourceColumn) {
super(msg);
this.sourceLine = sourceLine;
this.sourceColumn = sourceColumn;
}
}
class StreamEndException : Exception {
this() {
super("Stream ended.");
}
}
/**
* Parses a list of tokens from an input stream of lines of code.
* Params:
* inputStream = The lines of input to parse.
* Returns: A list of tokens.
*/
Token[] tokenize(S)(S inputStream) if (isInputStream!(S, char)) {
Appender!(Token[]) tokenApp;
bool streamEnded = false;
uint line = 0;
uint col = 0;
try {
while (true) {
// Trim whitespace from the start of each line.
char c = readChar(inputStream, line, col);
while (isWhite(c)) {
c = readChar(inputStream, line, col);
}
// Inline comment from here to newline.
if (c == '#') {
const commentStartCol = col;
c = readChar(inputStream, line, col);
char[] commentText;
while (c != '\n') {
commentText ~= c;
c = readChar(inputStream, line, col);
}
tokenApp ~= Token(
TokenType.COMMENT_INLINE,
strip(cast(string) commentText),
line,
commentStartCol
);
}
}
} catch (StreamEndException e) {
// This is expected!
}
return tokenApp[];
}
private char readChar(S)(S stream, ref uint line, ref uint col) if (isInputStream!(S, char)) {
char[1] buffer;
StreamResult result = stream.readFromStream(buffer);
if (result.hasError) {
throw new LexerException("Failed to read one more char from stream.", line, col);
}
if (result.count == 0) throw new StreamEndException();
col++;
if (buffer[0] == '\n') {
line++;
col = 0;
}
return buffer[0];
}