Changed to plain string parsing.

2023-06-27 06:45:28 +02:00 · 2023-06-27 06:45:28 +02:00 · a02d2ded29
parent 175039a639
commit a02d2ded29
4 changed files with 88 additions and 108 deletions
--- a/compiler/source/app.d
+++ b/compiler/source/app.d
@ -1,8 +1,7 @@
 import std.stdio;
 import std.string;
 import std.file;
 import nimpc;
 import streams;
 import streams.types.mapping;
 int main(string[] args) {
 	if (args.length < 2) {
@ -12,9 +11,10 @@ int main(string[] args) {
 	string[] files = args[1 .. $];
 	writefln!"Compiling %s"(files);
 	foreach (filename; files) {
-		auto sIn = FileInputStream(toStringz(filename));
+		auto tokens = tokenize(readText(filename));
-		auto tokens = tokenize(mappingInputStreamFor!((ubyte b) => cast(char) b)(sIn));
+		foreach (token; tokens) {
-		writeln(tokens);
+			writeln(token);
 		}
 	}
 	return 0;
 }
--- a/compiler/source/nimpc/lexer.d
+++ b/compiler/source/nimpc/lexer.d
@ -1,102 +0,0 @@
 module nimpc.lexer;
 import streams;
 import std.stdio;
 import std.regex;
 import std.array;
 import std.typecons;
 import std.uni;
 import std.string;
 enum TokenType {
    KEYWORD,
    DATA_TYPE,
    SENTENCE_END,
    LITERAL_INTEGER,
    LITERAL_FLOAT,
    LITERAL_BOOLEAN,
    LITERAL_STRING,
    COMMENT_INLINE,
    SYMBOL
 }
 immutable struct Token {
    TokenType type;
    string content;
    uint line;
    uint column;
 }
 class LexerException : Exception {
    const uint sourceLine;
    const uint sourceColumn;
    this(string msg, uint sourceLine, uint sourceColumn) {
        super(msg);
        this.sourceLine = sourceLine;
        this.sourceColumn = sourceColumn;
    }
 }
 class StreamEndException : Exception {
    this() {
        super("Stream ended.");
    }
 }
 /**
 * Parses a list of tokens from an input stream of lines of code.
 * Params:
 *   inputStream = The lines of input to parse.
 * Returns: A list of tokens.
 */
 Token[] tokenize(S)(S inputStream) if (isInputStream!(S, char)) {
    Appender!(Token[]) tokenApp;
    bool streamEnded = false;
    uint line = 0;
    uint col = 0;
    try {
        while (true) {
            // Trim whitespace from the start of each line.
            char c = readChar(inputStream, line, col);
            while (isWhite(c)) {
                c = readChar(inputStream, line, col);
            }
            // Inline comment from here to newline.
            if (c == '#') {
                const commentStartCol = col;
                c = readChar(inputStream, line, col);
                char[] commentText;
                while (c != '\n') {
                    commentText ~= c;
                    c = readChar(inputStream, line, col);
                }
                tokenApp ~= Token(
                    TokenType.COMMENT_INLINE,
                    strip(cast(string) commentText),
                    line,
                    commentStartCol
                );
            }
        }
    } catch (StreamEndException e) {
        // This is expected!
    }
    return tokenApp[];
 }
 private char readChar(S)(S stream, ref uint line, ref uint col) if (isInputStream!(S, char)) {
    char[1] buffer;
    StreamResult result = stream.readFromStream(buffer);
    if (result.hasError) {
        throw new LexerException("Failed to read one more char from stream.", line, col);
    }
    if (result.count == 0) throw new StreamEndException();
    col++;
    if (buffer[0] == '\n') {
        line++;
        col = 0;
    }
    return buffer[0];
 }
--- a/compiler/source/nimpc/package.d
+++ b/compiler/source/nimpc/package.d
@ -5,4 +5,4 @@
 module nimpc;
 public import nimpc.ast;
-public import nimpc.lexer;
+public import nimpc.parser;
--- a/compiler/source/nimpc/parser.d
+++ b/compiler/source/nimpc/parser.d
@ -0,0 +1,82 @@
 module nimpc.parser;
 import streams;
 import std.stdio;
 import std.regex;
 import std.array;
 import std.typecons;
 import std.uni;
 import std.string;
 enum TokenType {
    KEYWORD,
    DATA_TYPE,
    SENTENCE_END,
    LITERAL_INTEGER,
    LITERAL_FLOAT,
    LITERAL_BOOLEAN,
    LITERAL_STRING,
    COMMENT_INLINE,
    SYMBOL
 }
 immutable struct Token {
    TokenType type;
    string content;
    uint line;
    uint column;
 }
 class LexerException : Exception {
    const uint sourceLine;
    const uint sourceColumn;
    this(string msg, uint sourceLine, uint sourceColumn) {
        super(msg);
        this.sourceLine = sourceLine;
        this.sourceColumn = sourceColumn;
    }
 }
 class StreamEndException : Exception {
    this() {
        super("Stream ended.");
    }
 }
 /**
 * Parses a list of tokens from an input stream of lines of code.
 * Params:
 *   input = The lines of input to parse.
 * Returns: A list of tokens.
 */
 Token[] tokenize(string input) {
    Appender!(Token[]) tokenApp;
    bool streamEnded = false;
    uint line = 0;
    uint col = 0;
    while (input.length > 0) {
        input = stripLeft(input);
        if (input.length == 0) break;
        if (input[0] == '#') {
            writeln("Parsing inline comment.");
            // parse inline comment.
            size_t endIdx = 0;
            while (!(endIdx >= input.length || input[endIdx] == '\n')) {
                endIdx++;
            }
            tokenApp ~= Token(TokenType.COMMENT_INLINE, input[0 .. endIdx], line, col);
            input = input[endIdx .. $];
        } else if (input.length >= 7 && input[0 .. 7] == "define ") {
            tokenApp ~= Token(TokenType.KEYWORD, input[0 .. 6], line, col);
            input = input[7 .. $];
        } else {
            input = input[1 .. $];
        }
    }
    return tokenApp[];
 }