module nimpc.lexer; import streams; import std.stdio; import std.regex; import std.array; import std.typecons; import std.uni; import std.string; enum TokenType { KEYWORD, DATA_TYPE, SENTENCE_END, LITERAL_INTEGER, LITERAL_FLOAT, LITERAL_BOOLEAN, LITERAL_STRING, COMMENT_INLINE, SYMBOL } immutable struct Token { TokenType type; string content; uint line; uint column; } class LexerException : Exception { const uint sourceLine; const uint sourceColumn; this(string msg, uint sourceLine, uint sourceColumn) { super(msg); this.sourceLine = sourceLine; this.sourceColumn = sourceColumn; } } class StreamEndException : Exception { this() { super("Stream ended."); } } /** * Parses a list of tokens from an input stream of lines of code. * Params: * inputStream = The lines of input to parse. * Returns: A list of tokens. */ Token[] tokenize(S)(S inputStream) if (isInputStream!(S, char)) { Appender!(Token[]) tokenApp; bool streamEnded = false; uint line = 0; uint col = 0; try { while (true) { // Trim whitespace from the start of each line. char c = readChar(inputStream, line, col); while (isWhite(c)) { c = readChar(inputStream, line, col); } // Inline comment from here to newline. if (c == '#') { const commentStartCol = col; c = readChar(inputStream, line, col); char[] commentText; while (c != '\n') { commentText ~= c; c = readChar(inputStream, line, col); } tokenApp ~= Token( TokenType.COMMENT_INLINE, strip(cast(string) commentText), line, commentStartCol ); } } } catch (StreamEndException e) { // This is expected! } return tokenApp[]; } private char readChar(S)(S stream, ref uint line, ref uint col) if (isInputStream!(S, char)) { char[1] buffer; StreamResult result = stream.readFromStream(buffer); if (result.hasError) { throw new LexerException("Failed to read one more char from stream.", line, col); } if (result.count == 0) throw new StreamEndException(); col++; if (buffer[0] == '\n') { line++; col = 0; } return buffer[0]; }