From a02d2ded29957920008df1adcde0b5d43c78cf93 Mon Sep 17 00:00:00 2001 From: Andrew Lalis Date: Tue, 27 Jun 2023 06:45:28 +0200 Subject: [PATCH] Changed to plain string parsing. --- compiler/source/app.d | 10 ++-- compiler/source/nimpc/lexer.d | 102 -------------------------------- compiler/source/nimpc/package.d | 2 +- compiler/source/nimpc/parser.d | 82 +++++++++++++++++++++++++ 4 files changed, 88 insertions(+), 108 deletions(-) delete mode 100644 compiler/source/nimpc/lexer.d create mode 100644 compiler/source/nimpc/parser.d diff --git a/compiler/source/app.d b/compiler/source/app.d index 15ef846..3afd1ed 100644 --- a/compiler/source/app.d +++ b/compiler/source/app.d @@ -1,8 +1,7 @@ import std.stdio; import std.string; +import std.file; import nimpc; -import streams; -import streams.types.mapping; int main(string[] args) { if (args.length < 2) { @@ -12,9 +11,10 @@ int main(string[] args) { string[] files = args[1 .. $]; writefln!"Compiling %s"(files); foreach (filename; files) { - auto sIn = FileInputStream(toStringz(filename)); - auto tokens = tokenize(mappingInputStreamFor!((ubyte b) => cast(char) b)(sIn)); - writeln(tokens); + auto tokens = tokenize(readText(filename)); + foreach (token; tokens) { + writeln(token); + } } return 0; } diff --git a/compiler/source/nimpc/lexer.d b/compiler/source/nimpc/lexer.d deleted file mode 100644 index 2c0dcec..0000000 --- a/compiler/source/nimpc/lexer.d +++ /dev/null @@ -1,102 +0,0 @@ -module nimpc.lexer; - -import streams; -import std.stdio; -import std.regex; -import std.array; -import std.typecons; -import std.uni; -import std.string; - -enum TokenType { - KEYWORD, - DATA_TYPE, - SENTENCE_END, - LITERAL_INTEGER, - LITERAL_FLOAT, - LITERAL_BOOLEAN, - LITERAL_STRING, - COMMENT_INLINE, - SYMBOL -} - -immutable struct Token { - TokenType type; - string content; - uint line; - uint column; -} - -class LexerException : Exception { - const uint sourceLine; - const uint sourceColumn; - - this(string msg, uint sourceLine, uint sourceColumn) { - super(msg); - this.sourceLine = sourceLine; - this.sourceColumn = sourceColumn; - } -} - -class StreamEndException : Exception { - this() { - super("Stream ended."); - } -} - -/** - * Parses a list of tokens from an input stream of lines of code. - * Params: - * inputStream = The lines of input to parse. - * Returns: A list of tokens. - */ -Token[] tokenize(S)(S inputStream) if (isInputStream!(S, char)) { - Appender!(Token[]) tokenApp; - bool streamEnded = false; - uint line = 0; - uint col = 0; - - try { - while (true) { - // Trim whitespace from the start of each line. - char c = readChar(inputStream, line, col); - while (isWhite(c)) { - c = readChar(inputStream, line, col); - } - // Inline comment from here to newline. - if (c == '#') { - const commentStartCol = col; - c = readChar(inputStream, line, col); - char[] commentText; - while (c != '\n') { - commentText ~= c; - c = readChar(inputStream, line, col); - } - tokenApp ~= Token( - TokenType.COMMENT_INLINE, - strip(cast(string) commentText), - line, - commentStartCol - ); - } - } - } catch (StreamEndException e) { - // This is expected! - } - return tokenApp[]; -} - -private char readChar(S)(S stream, ref uint line, ref uint col) if (isInputStream!(S, char)) { - char[1] buffer; - StreamResult result = stream.readFromStream(buffer); - if (result.hasError) { - throw new LexerException("Failed to read one more char from stream.", line, col); - } - if (result.count == 0) throw new StreamEndException(); - col++; - if (buffer[0] == '\n') { - line++; - col = 0; - } - return buffer[0]; -} diff --git a/compiler/source/nimpc/package.d b/compiler/source/nimpc/package.d index 95b7a58..f5d76b1 100644 --- a/compiler/source/nimpc/package.d +++ b/compiler/source/nimpc/package.d @@ -5,4 +5,4 @@ module nimpc; public import nimpc.ast; -public import nimpc.lexer; +public import nimpc.parser; diff --git a/compiler/source/nimpc/parser.d b/compiler/source/nimpc/parser.d new file mode 100644 index 0000000..11a7f90 --- /dev/null +++ b/compiler/source/nimpc/parser.d @@ -0,0 +1,82 @@ +module nimpc.parser; + +import streams; +import std.stdio; +import std.regex; +import std.array; +import std.typecons; +import std.uni; +import std.string; + +enum TokenType { + KEYWORD, + DATA_TYPE, + SENTENCE_END, + LITERAL_INTEGER, + LITERAL_FLOAT, + LITERAL_BOOLEAN, + LITERAL_STRING, + COMMENT_INLINE, + SYMBOL +} + +immutable struct Token { + TokenType type; + string content; + uint line; + uint column; +} + +class LexerException : Exception { + const uint sourceLine; + const uint sourceColumn; + + this(string msg, uint sourceLine, uint sourceColumn) { + super(msg); + this.sourceLine = sourceLine; + this.sourceColumn = sourceColumn; + } +} + +class StreamEndException : Exception { + this() { + super("Stream ended."); + } +} + +/** + * Parses a list of tokens from an input stream of lines of code. + * Params: + * input = The lines of input to parse. + * Returns: A list of tokens. + */ +Token[] tokenize(string input) { + Appender!(Token[]) tokenApp; + bool streamEnded = false; + uint line = 0; + uint col = 0; + + while (input.length > 0) { + input = stripLeft(input); + if (input.length == 0) break; + if (input[0] == '#') { + writeln("Parsing inline comment."); + // parse inline comment. + size_t endIdx = 0; + while (!(endIdx >= input.length || input[endIdx] == '\n')) { + endIdx++; + } + tokenApp ~= Token(TokenType.COMMENT_INLINE, input[0 .. endIdx], line, col); + input = input[endIdx .. $]; + } else if (input.length >= 7 && input[0 .. 7] == "define ") { + tokenApp ~= Token(TokenType.KEYWORD, input[0 .. 6], line, col); + input = input[7 .. $]; + } else { + input = input[1 .. $]; + } + } + + return tokenApp[]; +} + +