nimp/compiler/source/nimpc/lexer.d

module nimpc.lexer;

import streams;
import std.stdio;
import std.regex;
import std.array;
import std.typecons;
import std.uni;
import std.string;

enum TokenType {
    KEYWORD,
    DATA_TYPE,
    SENTENCE_END,
    LITERAL_INTEGER,
    LITERAL_FLOAT,
    LITERAL_BOOLEAN,
    LITERAL_STRING,
    COMMENT_INLINE,
    SYMBOL
}

immutable struct Token {
    TokenType type;
    string content;
    uint line;
    uint column;
}

class LexerException : Exception {
    const uint sourceLine;
    const uint sourceColumn;

    this(string msg, uint sourceLine, uint sourceColumn) {
        super(msg);
        this.sourceLine = sourceLine;
        this.sourceColumn = sourceColumn;
    }
}

class StreamEndException : Exception {
    this() {
        super("Stream ended.");
    }
}

/**
 * Parses a list of tokens from an input stream of lines of code.
 * Params:
 *   inputStream = The lines of input to parse.
 * Returns: A list of tokens.
 */
Token[] tokenize(S)(S inputStream) if (isInputStream!(S, char)) {
    Appender!(Token[]) tokenApp;
    bool streamEnded = false;
    uint line = 0;
    uint col = 0;

    try {
        while (true) {
            // Trim whitespace from the start of each line.
            char c = readChar(inputStream, line, col);
            while (isWhite(c)) {
                c = readChar(inputStream, line, col);
            }
            // Inline comment from here to newline.
            if (c == '#') {
                const commentStartCol = col;
                c = readChar(inputStream, line, col);
                char[] commentText;
                while (c != '\n') {
                    commentText ~= c;
                    c = readChar(inputStream, line, col);
                }
                tokenApp ~= Token(
                    TokenType.COMMENT_INLINE,
                    strip(cast(string) commentText),
                    line,
                    commentStartCol
                );
            }
        }
    } catch (StreamEndException e) {
        // This is expected!
    }
    return tokenApp[];
}

private char readChar(S)(S stream, ref uint line, ref uint col) if (isInputStream!(S, char)) {
    char[1] buffer;
    StreamResult result = stream.readFromStream(buffer);
    if (result.hasError) {
        throw new LexerException("Failed to read one more char from stream.", line, col);
    }
    if (result.count == 0) throw new StreamEndException();
    col++;
    if (buffer[0] == '\n') {
        line++;
        col = 0;
    }
    return buffer[0];
}