diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000..62622c5
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,25 @@
+
+
+ 4.0.0
+
+ nl.andrewlalis
+ grammar-tool
+ 1.0-SNAPSHOT
+
+
+ 15
+ 15
+
+
+
+
+ org.projectlombok
+ lombok
+ 1.18.18
+ provided
+ true
+
+
+
\ No newline at end of file
diff --git a/src/main/java/nl/andrewlalis/grammar_tool/GrammarTool.java b/src/main/java/nl/andrewlalis/grammar_tool/GrammarTool.java
new file mode 100644
index 0000000..d167c2d
--- /dev/null
+++ b/src/main/java/nl/andrewlalis/grammar_tool/GrammarTool.java
@@ -0,0 +1,20 @@
+package nl.andrewlalis.grammar_tool;
+
+import nl.andrewlalis.grammar_tool.grammar.ContextFreeGrammar;
+
+public class GrammarTool {
+ public static void main(String[] args) {
+ ContextFreeGrammar g2 = ContextFreeGrammar.fromProductionRules(
+ "S",
+ "S, A, B, C",
+ "a, b, c",
+ "S -> A,B",
+ "A -> a,S | ε",
+ "B -> b,B | ε",
+ "C -> c,C | ε"
+ );
+ System.out.println(g2);
+ ContextFreeGrammar productive = g2.toProductiveForm();
+ System.out.println(productive);
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/nl/andrewlalis/grammar_tool/grammar/ContextFreeGrammar.java b/src/main/java/nl/andrewlalis/grammar_tool/grammar/ContextFreeGrammar.java
new file mode 100644
index 0000000..93fa338
--- /dev/null
+++ b/src/main/java/nl/andrewlalis/grammar_tool/grammar/ContextFreeGrammar.java
@@ -0,0 +1,163 @@
+package nl.andrewlalis.grammar_tool.grammar;
+
+import lombok.Getter;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+@Getter
+public class ContextFreeGrammar {
+ private final Set nonTerminalSymbols;
+ private final Set terminalSymbols;
+ private final Set productionRules;
+ private final Symbol startSymbol;
+
+ public ContextFreeGrammar(Set nonTerminalSymbols, Set terminalSymbols, Set productionRules, Symbol startSymbol) {
+ this.nonTerminalSymbols = Objects.requireNonNull(nonTerminalSymbols);
+ this.terminalSymbols = Objects.requireNonNull(terminalSymbols);
+ this.productionRules = Objects.requireNonNull(productionRules);
+ this.startSymbol = Objects.requireNonNull(startSymbol);
+ this.ensureValidElements();
+ }
+
+ private void ensureValidElements() {
+ if (!nonTerminalSymbols.contains(startSymbol)) {
+ throw new IllegalArgumentException("Start symbol must be an element of the set of non-terminal symbols.");
+ }
+ Set overlaps = new HashSet<>(this.terminalSymbols);
+ overlaps.retainAll(this.nonTerminalSymbols);
+ if (!overlaps.isEmpty()) {
+ throw new IllegalArgumentException("Terminal and non-terminal symbols are overlapping: " + overlaps);
+ }
+ for (ProductionRule rule : productionRules) {
+ if (!nonTerminalSymbols.contains(rule.getBeginSymbol())) {
+ throw new IllegalArgumentException("Production rule " + rule.toString() + " must begin with a symbol from the set of non-terminals.");
+ }
+ for (Symbol s : rule.getProducedSymbols()) {
+ if (!nonTerminalSymbols.contains(s) && !terminalSymbols.contains(s)) {
+ throw new IllegalArgumentException("Production rule " + rule.toString() + " must produce a string containing symbols that are elements of either terminals or non-terminals.");
+ }
+ }
+ }
+ }
+
+ public boolean isSymbolTerminal(Symbol s) {
+ return this.terminalSymbols.contains(s);
+ }
+
+ public boolean isSymbolNonTerminal(Symbol s) {
+ return this.nonTerminalSymbols.contains(s);
+ }
+
+ public Set findRulesByStartingSymbol(Symbol s) {
+ Set rules = new HashSet<>();
+ for (var rule : this.productionRules) {
+ if (rule.getBeginSymbol().equals(s)) rules.add(rule);
+ }
+ return rules;
+ }
+
+ /**
+ * Determines if a symbol is recursive in the grammar. A symbol is defined
+ * as recursive if it is a non-terminal that begins at least one production
+ * rule, and by following the non-terminals that rule produces, the symbol
+ * is again encountered as the result of another production rule.
+ * @param s The symbol to check.
+ * @return True if the symbol is recursive, or false otherwise.
+ */
+ public boolean isSymbolRecursive(Symbol s) {
+ Set symbolsToCheck = new HashSet<>();
+ symbolsToCheck.add(s);
+ Set rulesToCheck = new HashSet<>(this.productionRules);
+ while (!rulesToCheck.isEmpty()) {
+ Set rulesToAdd = new HashSet<>();
+ Set rulesToRemove = new HashSet<>();
+ for (var rule : rulesToCheck) {
+ if (rule.getProducedSymbols().contains(s)) return true;
+ if (symbolsToCheck.contains(rule.getBeginSymbol())) {
+ for (var symbol : rule.getProducedSymbols()) {
+ if (this.isSymbolNonTerminal(symbol)) {
+ rulesToAdd.addAll(this.productionRules);
+ }
+ }
+ }
+ rulesToRemove.add(rule);
+ }
+ rulesToCheck.removeAll(rulesToRemove);
+ rulesToCheck.addAll(rulesToAdd);
+ }
+ return false;
+ }
+
+ public ContextFreeGrammar toProductiveForm() {
+ Symbol newStart = this.startSymbol;
+ Set nonTerminals = new HashSet<>(this.nonTerminalSymbols);
+ Set terminals = new HashSet<>(this.terminalSymbols);
+ Set rules = new HashSet<>(this.productionRules);
+ if (this.isSymbolRecursive(this.startSymbol)) {
+ newStart = Symbol.of("_T");
+ nonTerminals.add(newStart);
+ rules.add(ProductionRule.of(newStart, this.startSymbol));
+ }
+
+ // Find all nullables.
+
+
+ return new ContextFreeGrammar(nonTerminals, terminals, rules, newStart);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ ContextFreeGrammar that = (ContextFreeGrammar) o;
+ return getNonTerminalSymbols().equals(that.getNonTerminalSymbols())
+ && getTerminalSymbols().equals(that.getTerminalSymbols())
+ && getProductionRules().equals(that.getProductionRules())
+ && getStartSymbol().equals(that.getStartSymbol());
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(getNonTerminalSymbols(), getTerminalSymbols(), getProductionRules(), getStartSymbol());
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ Map> rulesMap = new HashMap<>();
+ SortedSet beginSymbols = new TreeSet<>();
+ for (ProductionRule rule : this.productionRules) {
+ beginSymbols.add(rule.getBeginSymbol());
+ if (!rulesMap.containsKey(rule.getBeginSymbol())) {
+ rulesMap.put(rule.getBeginSymbol(), new TreeSet<>());
+ }
+ rulesMap.get(rule.getBeginSymbol()).add(rule);
+ }
+ // Do start symbol explicitly at the beginning.
+ beginSymbols.remove(this.startSymbol);
+ sb.append(this.startSymbol.getIdentifier()).append(" -> ");
+ sb.append(rulesMap.get(this.getStartSymbol()).stream().map(ProductionRule::getProducedSymbolsString).collect(Collectors.joining(" | ")));
+ sb.append("\n");
+
+ for (Symbol s : beginSymbols) {
+ sb.append(s.getIdentifier()).append(" -> ");
+ sb.append(rulesMap.get(s).stream().map(ProductionRule::getProducedSymbolsString).collect(Collectors.joining(" | ")));
+ sb.append("\n");
+ }
+ return sb.toString();
+ }
+
+ public static ContextFreeGrammar fromProductionRules(String start, String nonTerminals, String terminals, String... ruleExpressions) {
+ Set rules = new HashSet<>();
+ for (String ruleExpr : ruleExpressions) {
+ rules.addAll(ProductionRule.of(ruleExpr));
+ }
+ return new ContextFreeGrammar(
+ Symbol.setOf(nonTerminals.split(",")),
+ Symbol.setOf(terminals.split(",")),
+ rules,
+ Symbol.of(start)
+ );
+ }
+}
diff --git a/src/main/java/nl/andrewlalis/grammar_tool/grammar/ProductionRule.java b/src/main/java/nl/andrewlalis/grammar_tool/grammar/ProductionRule.java
new file mode 100644
index 0000000..110d7f9
--- /dev/null
+++ b/src/main/java/nl/andrewlalis/grammar_tool/grammar/ProductionRule.java
@@ -0,0 +1,81 @@
+package nl.andrewlalis.grammar_tool.grammar;
+
+import lombok.Getter;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+@Getter
+public class ProductionRule implements Comparable {
+ private final Symbol beginSymbol;
+ private final List producedSymbols;
+
+ public ProductionRule(Symbol beginSymbol, List producedSymbols) {
+ this.beginSymbol = Objects.requireNonNull(beginSymbol);
+ this.producedSymbols = Objects.requireNonNull(producedSymbols);
+ }
+
+ public boolean isEmpty() {
+ return this.producedSymbols.isEmpty();
+ }
+
+ public String getProducedSymbolsString() {
+ if (this.producedSymbols.isEmpty()) return "ε";
+ return this.producedSymbols.stream().map(Symbol::getIdentifier).collect(Collectors.joining(","));
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ ProductionRule that = (ProductionRule) o;
+ return getBeginSymbol().equals(that.getBeginSymbol()) && getProducedSymbols().equals(that.getProducedSymbols());
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(getBeginSymbol(), getProducedSymbols());
+ }
+
+ @Override
+ public String toString() {
+ return this.beginSymbol + " -> " + this.getProducedSymbolsString();
+ }
+
+ @Override
+ public int compareTo(ProductionRule o) {
+ int beginComparison = this.beginSymbol.compareTo(o.beginSymbol);
+ if (beginComparison != 0) return beginComparison;
+ return -1 * Integer.compare(this.producedSymbols.size(), o.producedSymbols.size());
+ }
+
+ public static ProductionRule of(Symbol beginSymbol, Symbol... producedSymbols) {
+ return new ProductionRule(beginSymbol, Arrays.asList(producedSymbols));
+ }
+
+ @SafeVarargs
+ public static Set setOf(Symbol beginSymbol, List... producedSymbols) {
+ Set rules = new HashSet<>();
+ for (var symbolsList : producedSymbols) {
+ rules.add(new ProductionRule(beginSymbol, symbolsList));
+ }
+ return rules;
+ }
+
+ public static Set of(String expression) {
+ Scanner scanner = new Scanner(expression);
+ Symbol beginSymbol = Symbol.of(scanner.next("\\w+"));
+ scanner.next("->");
+ String[] productions = scanner.nextLine().split("\\|");
+ Set rules = new HashSet<>();
+ for (String productionExpr : productions) {
+ String[] symbolNames = productionExpr.split(",");
+ if (symbolNames.length == 1 && symbolNames[0].trim().equalsIgnoreCase("ε")) {
+ rules.add(ProductionRule.of(beginSymbol));
+ continue;
+ }
+ rules.add(ProductionRule.of(beginSymbol, Symbol.arrayOf(symbolNames)));
+ }
+ return rules;
+ }
+}
diff --git a/src/main/java/nl/andrewlalis/grammar_tool/grammar/Symbol.java b/src/main/java/nl/andrewlalis/grammar_tool/grammar/Symbol.java
new file mode 100644
index 0000000..bcfda51
--- /dev/null
+++ b/src/main/java/nl/andrewlalis/grammar_tool/grammar/Symbol.java
@@ -0,0 +1,60 @@
+package nl.andrewlalis.grammar_tool.grammar;
+
+import lombok.Getter;
+
+import java.util.HashSet;
+import java.util.Objects;
+import java.util.Set;
+
+public class Symbol implements Comparable {
+ @Getter
+ private final String identifier;
+
+ public Symbol(String identifier) {
+ this.identifier = Objects.requireNonNull(identifier).trim();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ Symbol symbol = (Symbol) o;
+ return getIdentifier().equals(symbol.getIdentifier());
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(getIdentifier());
+ }
+
+ @Override
+ public String toString() {
+ return this.identifier;
+ }
+
+ @Override
+ public int compareTo(Symbol o) {
+ return this.identifier.compareTo(o.identifier);
+ }
+
+ public static Symbol of(String identifier) {
+ return new Symbol(identifier);
+ }
+
+ public static Set setOf(String... identifiers) {
+ Set symbols = new HashSet<>();
+ for (String i : identifiers) {
+ symbols.add(new Symbol(i));
+ }
+ return symbols;
+ }
+
+ public static Symbol[] arrayOf(String... identifiers) {
+ Symbol[] symbols = new Symbol[identifiers.length];
+ int index = 0;
+ for (String i : identifiers) {
+ symbols[index++] = new Symbol(i);
+ }
+ return symbols;
+ }
+}