nimp/compiler/source/nimpc/parser.d

module nimpc.parser;

import streams;
import std.stdio;
import std.regex;
import std.array;
import std.typecons;
import std.uni;
import std.string;

enum TokenType {
    KEYWORD,
    DATA_TYPE,
    SENTENCE_END,
    LITERAL_INTEGER,
    LITERAL_FLOAT,
    LITERAL_BOOLEAN,
    LITERAL_STRING,
    COMMENT_INLINE,
    SYMBOL
}

immutable struct Token {
    TokenType type;
    string content;
    uint line;
    uint column;
}

class LexerException : Exception {
    const uint sourceLine;
    const uint sourceColumn;

    this(string msg, uint sourceLine, uint sourceColumn) {
        super(msg);
        this.sourceLine = sourceLine;
        this.sourceColumn = sourceColumn;
    }
}

class StreamEndException : Exception {
    this() {
        super("Stream ended.");
    }
}

/**
 * Parses a list of tokens from an input stream of lines of code.
 * Params:
 *   input = The lines of input to parse.
 * Returns: A list of tokens.
 */
Token[] tokenize(string input) {
    Appender!(Token[]) tokenApp;
    bool streamEnded = false;
    uint line = 0;
    uint col = 0;

    while (input.length > 0) {
        input = stripLeft(input);
        if (input.length == 0) break;
        if (input[0] == '#') {
            writeln("Parsing inline comment.");
            // parse inline comment.
            size_t endIdx = 0;
            while (!(endIdx >= input.length || input[endIdx] == '\n')) {
                endIdx++;
            }
            tokenApp ~= Token(TokenType.COMMENT_INLINE, input[0 .. endIdx], line, col);
            input = input[endIdx .. $];
        } else if (input.length >= 7 && input[0 .. 7] == "define ") {
            tokenApp ~= Token(TokenType.KEYWORD, input[0 .. 6], line, col);
            input = input[7 .. $];
        } else {
            input = input[1 .. $];
        }
    }

    return tokenApp[];
}