Changed to plain string parsing.

This commit is contained in:
Andrew Lalis 2023-06-27 06:45:28 +02:00
parent 175039a639
commit a02d2ded29
4 changed files with 88 additions and 108 deletions

View File

@ -1,8 +1,7 @@
import std.stdio;
import std.string;
import std.file;
import nimpc;
import streams;
import streams.types.mapping;
int main(string[] args) {
if (args.length < 2) {
@ -12,9 +11,10 @@ int main(string[] args) {
string[] files = args[1 .. $];
writefln!"Compiling %s"(files);
foreach (filename; files) {
auto sIn = FileInputStream(toStringz(filename));
auto tokens = tokenize(mappingInputStreamFor!((ubyte b) => cast(char) b)(sIn));
writeln(tokens);
auto tokens = tokenize(readText(filename));
foreach (token; tokens) {
writeln(token);
}
}
return 0;
}

View File

@ -1,102 +0,0 @@
module nimpc.lexer;
import streams;
import std.stdio;
import std.regex;
import std.array;
import std.typecons;
import std.uni;
import std.string;
enum TokenType {
KEYWORD,
DATA_TYPE,
SENTENCE_END,
LITERAL_INTEGER,
LITERAL_FLOAT,
LITERAL_BOOLEAN,
LITERAL_STRING,
COMMENT_INLINE,
SYMBOL
}
immutable struct Token {
TokenType type;
string content;
uint line;
uint column;
}
class LexerException : Exception {
const uint sourceLine;
const uint sourceColumn;
this(string msg, uint sourceLine, uint sourceColumn) {
super(msg);
this.sourceLine = sourceLine;
this.sourceColumn = sourceColumn;
}
}
class StreamEndException : Exception {
this() {
super("Stream ended.");
}
}
/**
* Parses a list of tokens from an input stream of lines of code.
* Params:
* inputStream = The lines of input to parse.
* Returns: A list of tokens.
*/
Token[] tokenize(S)(S inputStream) if (isInputStream!(S, char)) {
Appender!(Token[]) tokenApp;
bool streamEnded = false;
uint line = 0;
uint col = 0;
try {
while (true) {
// Trim whitespace from the start of each line.
char c = readChar(inputStream, line, col);
while (isWhite(c)) {
c = readChar(inputStream, line, col);
}
// Inline comment from here to newline.
if (c == '#') {
const commentStartCol = col;
c = readChar(inputStream, line, col);
char[] commentText;
while (c != '\n') {
commentText ~= c;
c = readChar(inputStream, line, col);
}
tokenApp ~= Token(
TokenType.COMMENT_INLINE,
strip(cast(string) commentText),
line,
commentStartCol
);
}
}
} catch (StreamEndException e) {
// This is expected!
}
return tokenApp[];
}
private char readChar(S)(S stream, ref uint line, ref uint col) if (isInputStream!(S, char)) {
char[1] buffer;
StreamResult result = stream.readFromStream(buffer);
if (result.hasError) {
throw new LexerException("Failed to read one more char from stream.", line, col);
}
if (result.count == 0) throw new StreamEndException();
col++;
if (buffer[0] == '\n') {
line++;
col = 0;
}
return buffer[0];
}

View File

@ -5,4 +5,4 @@
module nimpc;
public import nimpc.ast;
public import nimpc.lexer;
public import nimpc.parser;

View File

@ -0,0 +1,82 @@
module nimpc.parser;
import streams;
import std.stdio;
import std.regex;
import std.array;
import std.typecons;
import std.uni;
import std.string;
enum TokenType {
KEYWORD,
DATA_TYPE,
SENTENCE_END,
LITERAL_INTEGER,
LITERAL_FLOAT,
LITERAL_BOOLEAN,
LITERAL_STRING,
COMMENT_INLINE,
SYMBOL
}
immutable struct Token {
TokenType type;
string content;
uint line;
uint column;
}
class LexerException : Exception {
const uint sourceLine;
const uint sourceColumn;
this(string msg, uint sourceLine, uint sourceColumn) {
super(msg);
this.sourceLine = sourceLine;
this.sourceColumn = sourceColumn;
}
}
class StreamEndException : Exception {
this() {
super("Stream ended.");
}
}
/**
* Parses a list of tokens from an input stream of lines of code.
* Params:
* input = The lines of input to parse.
* Returns: A list of tokens.
*/
Token[] tokenize(string input) {
Appender!(Token[]) tokenApp;
bool streamEnded = false;
uint line = 0;
uint col = 0;
while (input.length > 0) {
input = stripLeft(input);
if (input.length == 0) break;
if (input[0] == '#') {
writeln("Parsing inline comment.");
// parse inline comment.
size_t endIdx = 0;
while (!(endIdx >= input.length || input[endIdx] == '\n')) {
endIdx++;
}
tokenApp ~= Token(TokenType.COMMENT_INLINE, input[0 .. endIdx], line, col);
input = input[endIdx .. $];
} else if (input.length >= 7 && input[0 .. 7] == "define ") {
tokenApp ~= Token(TokenType.KEYWORD, input[0 .. 6], line, col);
input = input[7 .. $];
} else {
input = input[1 .. $];
}
}
return tokenApp[];
}