Changed to plain string parsing.

2023-06-27 06:45:28 +02:00 · 2023-06-27 06:45:28 +02:00 · a02d2ded29
parent 175039a639
commit a02d2ded29
4 changed files with 88 additions and 108 deletions
--- a/compiler/source/app.d
+++ b/compiler/source/app.d
@ -1,8 +1,7 @@
 import std.stdio;
 import std.string;
+import std.file;
 import nimpc;
-import streams;
-import streams.types.mapping;

 int main(string[] args) {
 	if (args.length < 2) {
@ -12,9 +11,10 @@ int main(string[] args) {
 	string[] files = args[1 .. $];
 	writefln!"Compiling %s"(files);
 	foreach (filename; files) {
-		auto sIn = FileInputStream(toStringz(filename));
-		auto tokens = tokenize(mappingInputStreamFor!((ubyte b) => cast(char) b)(sIn));
-		writeln(tokens);
+		auto tokens = tokenize(readText(filename));
+		foreach (token; tokens) {
+			writeln(token);
+		}
 	}
 	return 0;
 }
--- a/compiler/source/nimpc/lexer.d
+++ b/compiler/source/nimpc/lexer.d
@ -1,102 +0,0 @@
-module nimpc.lexer;
-
-import streams;
-import std.stdio;
-import std.regex;
-import std.array;
-import std.typecons;
-import std.uni;
-import std.string;
-
-enum TokenType {
-    KEYWORD,
-    DATA_TYPE,
-    SENTENCE_END,
-    LITERAL_INTEGER,
-    LITERAL_FLOAT,
-    LITERAL_BOOLEAN,
-    LITERAL_STRING,
-    COMMENT_INLINE,
-    SYMBOL
-}
-
-immutable struct Token {
-    TokenType type;
-    string content;
-    uint line;
-    uint column;
-}
-
-class LexerException : Exception {
-    const uint sourceLine;
-    const uint sourceColumn;
-
-    this(string msg, uint sourceLine, uint sourceColumn) {
-        super(msg);
-        this.sourceLine = sourceLine;
-        this.sourceColumn = sourceColumn;
-    }
-}
-
-class StreamEndException : Exception {
-    this() {
-        super("Stream ended.");
-    }
-}
-
-/**
- * Parses a list of tokens from an input stream of lines of code.
- * Params:
- *   inputStream = The lines of input to parse.
- * Returns: A list of tokens.
- */
-Token[] tokenize(S)(S inputStream) if (isInputStream!(S, char)) {
-    Appender!(Token[]) tokenApp;
-    bool streamEnded = false;
-    uint line = 0;
-    uint col = 0;
-
-    try {
-        while (true) {
-            // Trim whitespace from the start of each line.
-            char c = readChar(inputStream, line, col);
-            while (isWhite(c)) {
-                c = readChar(inputStream, line, col);
-            }
-            // Inline comment from here to newline.
-            if (c == '#') {
-                const commentStartCol = col;
-                c = readChar(inputStream, line, col);
-                char[] commentText;
-                while (c != '\n') {
-                    commentText ~= c;
-                    c = readChar(inputStream, line, col);
-                }
-                tokenApp ~= Token(
-                    TokenType.COMMENT_INLINE,
-                    strip(cast(string) commentText),
-                    line,
-                    commentStartCol
-                );
-            }
-        }
-    } catch (StreamEndException e) {
-        // This is expected!
-    }
-    return tokenApp[];
-}
-
-private char readChar(S)(S stream, ref uint line, ref uint col) if (isInputStream!(S, char)) {
-    char[1] buffer;
-    StreamResult result = stream.readFromStream(buffer);
-    if (result.hasError) {
-        throw new LexerException("Failed to read one more char from stream.", line, col);
-    }
-    if (result.count == 0) throw new StreamEndException();
-    col++;
-    if (buffer[0] == '\n') {
-        line++;
-        col = 0;
-    }
-    return buffer[0];
-}
--- a/compiler/source/nimpc/package.d
+++ b/compiler/source/nimpc/package.d
@ -5,4 +5,4 @@
 module nimpc;

 public import nimpc.ast;
-public import nimpc.lexer;
+public import nimpc.parser;
--- a/compiler/source/nimpc/parser.d
+++ b/compiler/source/nimpc/parser.d
@ -0,0 +1,82 @@
+module nimpc.parser;
+
+import streams;
+import std.stdio;
+import std.regex;
+import std.array;
+import std.typecons;
+import std.uni;
+import std.string;
+
+enum TokenType {
+    KEYWORD,
+    DATA_TYPE,
+    SENTENCE_END,
+    LITERAL_INTEGER,
+    LITERAL_FLOAT,
+    LITERAL_BOOLEAN,
+    LITERAL_STRING,
+    COMMENT_INLINE,
+    SYMBOL
+}
+
+immutable struct Token {
+    TokenType type;
+    string content;
+    uint line;
+    uint column;
+}
+
+class LexerException : Exception {
+    const uint sourceLine;
+    const uint sourceColumn;
+
+    this(string msg, uint sourceLine, uint sourceColumn) {
+        super(msg);
+        this.sourceLine = sourceLine;
+        this.sourceColumn = sourceColumn;
+    }
+}
+
+class StreamEndException : Exception {
+    this() {
+        super("Stream ended.");
+    }
+}
+
+/**
+ * Parses a list of tokens from an input stream of lines of code.
+ * Params:
+ *   input = The lines of input to parse.
+ * Returns: A list of tokens.
+ */
+Token[] tokenize(string input) {
+    Appender!(Token[]) tokenApp;
+    bool streamEnded = false;
+    uint line = 0;
+    uint col = 0;
+
+    while (input.length > 0) {
+        input = stripLeft(input);
+        if (input.length == 0) break;
+        if (input[0] == '#') {
+            writeln("Parsing inline comment.");
+            // parse inline comment.
+            size_t endIdx = 0;
+            while (!(endIdx >= input.length || input[endIdx] == '\n')) {
+                endIdx++;
+            }
+            tokenApp ~= Token(TokenType.COMMENT_INLINE, input[0 .. endIdx], line, col);
+            input = input[endIdx .. $];
+        } else if (input.length >= 7 && input[0 .. 7] == "define ") {
+            tokenApp ~= Token(TokenType.KEYWORD, input[0 .. 6], line, col);
+            input = input[7 .. $];
+        } else {
+            input = input[1 .. $];
+        }
+    }
+    
+    return tokenApp[];
+}
+
+