nimp/compiler/source/nimpc/parser.d

module nimpc.parser;

import streams;
import std.stdio;
import std.regex;
import std.array;
import std.typecons;
import std.uni;
import std.string;

enum TokenType {
    KEYWORD,
    DATA_TYPE,
    SENTENCE_END,
    LITERAL_INTEGER,
    LITERAL_FLOAT,
    LITERAL_BOOLEAN,
    LITERAL_STRING,
    COMMENT_INLINE,
    SYMBOL
}

immutable struct Token {
    TokenType type;
    string content;
    uint line;
    uint column;
}

class LexerException : Exception {
    const uint sourceLine;
    const uint sourceColumn;

    this(string msg, uint sourceLine, uint sourceColumn) {
        super(msg);
        this.sourceLine = sourceLine;
        this.sourceColumn = sourceColumn;
    }
}

class StreamEndException : Exception {
    this() {
        super("Stream ended.");
    }
}

/**
 * Parses a list of tokens from an input stream of lines of code.
 * Params:
 *   input = The lines of input to parse.
 * Returns: A list of tokens.
 */
Token[] tokenize(string input) {
    Appender!(Token[]) tokenApp;
    bool streamEnded = false;
    uint line = 0;
    uint col = 0;

    while (input.length > 0) {
        input = stripLeft(input);
        if (input.length == 0) break;
        if (input[0] == '#') {
            writeln("Parsing inline comment.");
            // parse inline comment.
            size_t endIdx = 0;
            while (!(endIdx >= input.length || input[endIdx] == '\n')) {
                endIdx++;
            }
            tokenApp ~= Token(TokenType.COMMENT_INLINE, input[0 .. endIdx], line, col);
            input = input[endIdx .. $];
        } else if (input.length >= 7 && input[0 .. 7] == "define ") {
            tokenApp ~= Token(TokenType.KEYWORD, input[0 .. 6], line, col);
            input = input[7 .. $];
        } else {
            input = input[1 .. $];
        }
    }
    
    return tokenApp[];
}
Changed to plain string parsing. 2023-06-27 04:45:28 +00:00			`module nimpc.parser;`

			`import streams;`
			`import std.stdio;`
			`import std.regex;`
			`import std.array;`
			`import std.typecons;`
			`import std.uni;`
			`import std.string;`

			`enum TokenType {`
			`KEYWORD,`
			`DATA_TYPE,`
			`SENTENCE_END,`
			`LITERAL_INTEGER,`
			`LITERAL_FLOAT,`
			`LITERAL_BOOLEAN,`
			`LITERAL_STRING,`
			`COMMENT_INLINE,`
			`SYMBOL`
			`}`

			`immutable struct Token {`
			`TokenType type;`
			`string content;`
			`uint line;`
			`uint column;`
			`}`

			`class LexerException : Exception {`
			`const uint sourceLine;`
			`const uint sourceColumn;`

			`this(string msg, uint sourceLine, uint sourceColumn) {`
			`super(msg);`
			`this.sourceLine = sourceLine;`
			`this.sourceColumn = sourceColumn;`
			`}`
			`}`

			`class StreamEndException : Exception {`
			`this() {`
			`super("Stream ended.");`
			`}`
			`}`

			`/**`
			`* Parses a list of tokens from an input stream of lines of code.`
			`* Params:`
			`* input = The lines of input to parse.`
			`* Returns: A list of tokens.`
			`*/`
			`Token[] tokenize(string input) {`
			`Appender!(Token[]) tokenApp;`
			`bool streamEnded = false;`
			`uint line = 0;`
			`uint col = 0;`

			`while (input.length > 0) {`
			`input = stripLeft(input);`
			`if (input.length == 0) break;`
			`if (input[0] == '#') {`
			`writeln("Parsing inline comment.");`
			`// parse inline comment.`
			`size_t endIdx = 0;`
			`while (!(endIdx >= input.length \|\| input[endIdx] == '\n')) {`
			`endIdx++;`
			`}`
			`tokenApp ~= Token(TokenType.COMMENT_INLINE, input[0 .. endIdx], line, col);`
			`input = input[endIdx .. $];`
			`} else if (input.length >= 7 && input[0 .. 7] == "define ") {`
			`tokenApp ~= Token(TokenType.KEYWORD, input[0 .. 6], line, col);`
			`input = input[7 .. $];`
			`} else {`
			`input = input[1 .. $];`
			`}`
			`}`

			`return tokenApp[];`
			`}`