Changed to plain string parsing.
This commit is contained in:
parent
175039a639
commit
a02d2ded29
|
@ -1,8 +1,7 @@
|
|||
import std.stdio;
|
||||
import std.string;
|
||||
import std.file;
|
||||
import nimpc;
|
||||
import streams;
|
||||
import streams.types.mapping;
|
||||
|
||||
int main(string[] args) {
|
||||
if (args.length < 2) {
|
||||
|
@ -12,9 +11,10 @@ int main(string[] args) {
|
|||
string[] files = args[1 .. $];
|
||||
writefln!"Compiling %s"(files);
|
||||
foreach (filename; files) {
|
||||
auto sIn = FileInputStream(toStringz(filename));
|
||||
auto tokens = tokenize(mappingInputStreamFor!((ubyte b) => cast(char) b)(sIn));
|
||||
writeln(tokens);
|
||||
auto tokens = tokenize(readText(filename));
|
||||
foreach (token; tokens) {
|
||||
writeln(token);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,102 +0,0 @@
|
|||
module nimpc.lexer;
|
||||
|
||||
import streams;
|
||||
import std.stdio;
|
||||
import std.regex;
|
||||
import std.array;
|
||||
import std.typecons;
|
||||
import std.uni;
|
||||
import std.string;
|
||||
|
||||
enum TokenType {
|
||||
KEYWORD,
|
||||
DATA_TYPE,
|
||||
SENTENCE_END,
|
||||
LITERAL_INTEGER,
|
||||
LITERAL_FLOAT,
|
||||
LITERAL_BOOLEAN,
|
||||
LITERAL_STRING,
|
||||
COMMENT_INLINE,
|
||||
SYMBOL
|
||||
}
|
||||
|
||||
immutable struct Token {
|
||||
TokenType type;
|
||||
string content;
|
||||
uint line;
|
||||
uint column;
|
||||
}
|
||||
|
||||
class LexerException : Exception {
|
||||
const uint sourceLine;
|
||||
const uint sourceColumn;
|
||||
|
||||
this(string msg, uint sourceLine, uint sourceColumn) {
|
||||
super(msg);
|
||||
this.sourceLine = sourceLine;
|
||||
this.sourceColumn = sourceColumn;
|
||||
}
|
||||
}
|
||||
|
||||
class StreamEndException : Exception {
|
||||
this() {
|
||||
super("Stream ended.");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a list of tokens from an input stream of lines of code.
|
||||
* Params:
|
||||
* inputStream = The lines of input to parse.
|
||||
* Returns: A list of tokens.
|
||||
*/
|
||||
Token[] tokenize(S)(S inputStream) if (isInputStream!(S, char)) {
|
||||
Appender!(Token[]) tokenApp;
|
||||
bool streamEnded = false;
|
||||
uint line = 0;
|
||||
uint col = 0;
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
// Trim whitespace from the start of each line.
|
||||
char c = readChar(inputStream, line, col);
|
||||
while (isWhite(c)) {
|
||||
c = readChar(inputStream, line, col);
|
||||
}
|
||||
// Inline comment from here to newline.
|
||||
if (c == '#') {
|
||||
const commentStartCol = col;
|
||||
c = readChar(inputStream, line, col);
|
||||
char[] commentText;
|
||||
while (c != '\n') {
|
||||
commentText ~= c;
|
||||
c = readChar(inputStream, line, col);
|
||||
}
|
||||
tokenApp ~= Token(
|
||||
TokenType.COMMENT_INLINE,
|
||||
strip(cast(string) commentText),
|
||||
line,
|
||||
commentStartCol
|
||||
);
|
||||
}
|
||||
}
|
||||
} catch (StreamEndException e) {
|
||||
// This is expected!
|
||||
}
|
||||
return tokenApp[];
|
||||
}
|
||||
|
||||
private char readChar(S)(S stream, ref uint line, ref uint col) if (isInputStream!(S, char)) {
|
||||
char[1] buffer;
|
||||
StreamResult result = stream.readFromStream(buffer);
|
||||
if (result.hasError) {
|
||||
throw new LexerException("Failed to read one more char from stream.", line, col);
|
||||
}
|
||||
if (result.count == 0) throw new StreamEndException();
|
||||
col++;
|
||||
if (buffer[0] == '\n') {
|
||||
line++;
|
||||
col = 0;
|
||||
}
|
||||
return buffer[0];
|
||||
}
|
|
@ -5,4 +5,4 @@
|
|||
module nimpc;
|
||||
|
||||
public import nimpc.ast;
|
||||
public import nimpc.lexer;
|
||||
public import nimpc.parser;
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
module nimpc.parser;
|
||||
|
||||
import streams;
|
||||
import std.stdio;
|
||||
import std.regex;
|
||||
import std.array;
|
||||
import std.typecons;
|
||||
import std.uni;
|
||||
import std.string;
|
||||
|
||||
enum TokenType {
|
||||
KEYWORD,
|
||||
DATA_TYPE,
|
||||
SENTENCE_END,
|
||||
LITERAL_INTEGER,
|
||||
LITERAL_FLOAT,
|
||||
LITERAL_BOOLEAN,
|
||||
LITERAL_STRING,
|
||||
COMMENT_INLINE,
|
||||
SYMBOL
|
||||
}
|
||||
|
||||
immutable struct Token {
|
||||
TokenType type;
|
||||
string content;
|
||||
uint line;
|
||||
uint column;
|
||||
}
|
||||
|
||||
class LexerException : Exception {
|
||||
const uint sourceLine;
|
||||
const uint sourceColumn;
|
||||
|
||||
this(string msg, uint sourceLine, uint sourceColumn) {
|
||||
super(msg);
|
||||
this.sourceLine = sourceLine;
|
||||
this.sourceColumn = sourceColumn;
|
||||
}
|
||||
}
|
||||
|
||||
class StreamEndException : Exception {
|
||||
this() {
|
||||
super("Stream ended.");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a list of tokens from an input stream of lines of code.
|
||||
* Params:
|
||||
* input = The lines of input to parse.
|
||||
* Returns: A list of tokens.
|
||||
*/
|
||||
Token[] tokenize(string input) {
|
||||
Appender!(Token[]) tokenApp;
|
||||
bool streamEnded = false;
|
||||
uint line = 0;
|
||||
uint col = 0;
|
||||
|
||||
while (input.length > 0) {
|
||||
input = stripLeft(input);
|
||||
if (input.length == 0) break;
|
||||
if (input[0] == '#') {
|
||||
writeln("Parsing inline comment.");
|
||||
// parse inline comment.
|
||||
size_t endIdx = 0;
|
||||
while (!(endIdx >= input.length || input[endIdx] == '\n')) {
|
||||
endIdx++;
|
||||
}
|
||||
tokenApp ~= Token(TokenType.COMMENT_INLINE, input[0 .. endIdx], line, col);
|
||||
input = input[endIdx .. $];
|
||||
} else if (input.length >= 7 && input[0 .. 7] == "define ") {
|
||||
tokenApp ~= Token(TokenType.KEYWORD, input[0 .. 6], line, col);
|
||||
input = input[7 .. $];
|
||||
} else {
|
||||
input = input[1 .. $];
|
||||
}
|
||||
}
|
||||
|
||||
return tokenApp[];
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue