diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..62622c5 --- /dev/null +++ b/pom.xml @@ -0,0 +1,25 @@ + + + 4.0.0 + + nl.andrewlalis + grammar-tool + 1.0-SNAPSHOT + + + 15 + 15 + + + + + org.projectlombok + lombok + 1.18.18 + provided + true + + + \ No newline at end of file diff --git a/src/main/java/nl/andrewlalis/grammar_tool/GrammarTool.java b/src/main/java/nl/andrewlalis/grammar_tool/GrammarTool.java new file mode 100644 index 0000000..d167c2d --- /dev/null +++ b/src/main/java/nl/andrewlalis/grammar_tool/GrammarTool.java @@ -0,0 +1,20 @@ +package nl.andrewlalis.grammar_tool; + +import nl.andrewlalis.grammar_tool.grammar.ContextFreeGrammar; + +public class GrammarTool { + public static void main(String[] args) { + ContextFreeGrammar g2 = ContextFreeGrammar.fromProductionRules( + "S", + "S, A, B, C", + "a, b, c", + "S -> A,B", + "A -> a,S | ε", + "B -> b,B | ε", + "C -> c,C | ε" + ); + System.out.println(g2); + ContextFreeGrammar productive = g2.toProductiveForm(); + System.out.println(productive); + } +} \ No newline at end of file diff --git a/src/main/java/nl/andrewlalis/grammar_tool/grammar/ContextFreeGrammar.java b/src/main/java/nl/andrewlalis/grammar_tool/grammar/ContextFreeGrammar.java new file mode 100644 index 0000000..93fa338 --- /dev/null +++ b/src/main/java/nl/andrewlalis/grammar_tool/grammar/ContextFreeGrammar.java @@ -0,0 +1,163 @@ +package nl.andrewlalis.grammar_tool.grammar; + +import lombok.Getter; + +import java.util.*; +import java.util.stream.Collectors; + +@Getter +public class ContextFreeGrammar { + private final Set nonTerminalSymbols; + private final Set terminalSymbols; + private final Set productionRules; + private final Symbol startSymbol; + + public ContextFreeGrammar(Set nonTerminalSymbols, Set terminalSymbols, Set productionRules, Symbol startSymbol) { + this.nonTerminalSymbols = Objects.requireNonNull(nonTerminalSymbols); + this.terminalSymbols = Objects.requireNonNull(terminalSymbols); + this.productionRules = Objects.requireNonNull(productionRules); + this.startSymbol = Objects.requireNonNull(startSymbol); + this.ensureValidElements(); + } + + private void ensureValidElements() { + if (!nonTerminalSymbols.contains(startSymbol)) { + throw new IllegalArgumentException("Start symbol must be an element of the set of non-terminal symbols."); + } + Set overlaps = new HashSet<>(this.terminalSymbols); + overlaps.retainAll(this.nonTerminalSymbols); + if (!overlaps.isEmpty()) { + throw new IllegalArgumentException("Terminal and non-terminal symbols are overlapping: " + overlaps); + } + for (ProductionRule rule : productionRules) { + if (!nonTerminalSymbols.contains(rule.getBeginSymbol())) { + throw new IllegalArgumentException("Production rule " + rule.toString() + " must begin with a symbol from the set of non-terminals."); + } + for (Symbol s : rule.getProducedSymbols()) { + if (!nonTerminalSymbols.contains(s) && !terminalSymbols.contains(s)) { + throw new IllegalArgumentException("Production rule " + rule.toString() + " must produce a string containing symbols that are elements of either terminals or non-terminals."); + } + } + } + } + + public boolean isSymbolTerminal(Symbol s) { + return this.terminalSymbols.contains(s); + } + + public boolean isSymbolNonTerminal(Symbol s) { + return this.nonTerminalSymbols.contains(s); + } + + public Set findRulesByStartingSymbol(Symbol s) { + Set rules = new HashSet<>(); + for (var rule : this.productionRules) { + if (rule.getBeginSymbol().equals(s)) rules.add(rule); + } + return rules; + } + + /** + * Determines if a symbol is recursive in the grammar. A symbol is defined + * as recursive if it is a non-terminal that begins at least one production + * rule, and by following the non-terminals that rule produces, the symbol + * is again encountered as the result of another production rule. + * @param s The symbol to check. + * @return True if the symbol is recursive, or false otherwise. + */ + public boolean isSymbolRecursive(Symbol s) { + Set symbolsToCheck = new HashSet<>(); + symbolsToCheck.add(s); + Set rulesToCheck = new HashSet<>(this.productionRules); + while (!rulesToCheck.isEmpty()) { + Set rulesToAdd = new HashSet<>(); + Set rulesToRemove = new HashSet<>(); + for (var rule : rulesToCheck) { + if (rule.getProducedSymbols().contains(s)) return true; + if (symbolsToCheck.contains(rule.getBeginSymbol())) { + for (var symbol : rule.getProducedSymbols()) { + if (this.isSymbolNonTerminal(symbol)) { + rulesToAdd.addAll(this.productionRules); + } + } + } + rulesToRemove.add(rule); + } + rulesToCheck.removeAll(rulesToRemove); + rulesToCheck.addAll(rulesToAdd); + } + return false; + } + + public ContextFreeGrammar toProductiveForm() { + Symbol newStart = this.startSymbol; + Set nonTerminals = new HashSet<>(this.nonTerminalSymbols); + Set terminals = new HashSet<>(this.terminalSymbols); + Set rules = new HashSet<>(this.productionRules); + if (this.isSymbolRecursive(this.startSymbol)) { + newStart = Symbol.of("_T"); + nonTerminals.add(newStart); + rules.add(ProductionRule.of(newStart, this.startSymbol)); + } + + // Find all nullables. + + + return new ContextFreeGrammar(nonTerminals, terminals, rules, newStart); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ContextFreeGrammar that = (ContextFreeGrammar) o; + return getNonTerminalSymbols().equals(that.getNonTerminalSymbols()) + && getTerminalSymbols().equals(that.getTerminalSymbols()) + && getProductionRules().equals(that.getProductionRules()) + && getStartSymbol().equals(that.getStartSymbol()); + } + + @Override + public int hashCode() { + return Objects.hash(getNonTerminalSymbols(), getTerminalSymbols(), getProductionRules(), getStartSymbol()); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + Map> rulesMap = new HashMap<>(); + SortedSet beginSymbols = new TreeSet<>(); + for (ProductionRule rule : this.productionRules) { + beginSymbols.add(rule.getBeginSymbol()); + if (!rulesMap.containsKey(rule.getBeginSymbol())) { + rulesMap.put(rule.getBeginSymbol(), new TreeSet<>()); + } + rulesMap.get(rule.getBeginSymbol()).add(rule); + } + // Do start symbol explicitly at the beginning. + beginSymbols.remove(this.startSymbol); + sb.append(this.startSymbol.getIdentifier()).append(" -> "); + sb.append(rulesMap.get(this.getStartSymbol()).stream().map(ProductionRule::getProducedSymbolsString).collect(Collectors.joining(" | "))); + sb.append("\n"); + + for (Symbol s : beginSymbols) { + sb.append(s.getIdentifier()).append(" -> "); + sb.append(rulesMap.get(s).stream().map(ProductionRule::getProducedSymbolsString).collect(Collectors.joining(" | "))); + sb.append("\n"); + } + return sb.toString(); + } + + public static ContextFreeGrammar fromProductionRules(String start, String nonTerminals, String terminals, String... ruleExpressions) { + Set rules = new HashSet<>(); + for (String ruleExpr : ruleExpressions) { + rules.addAll(ProductionRule.of(ruleExpr)); + } + return new ContextFreeGrammar( + Symbol.setOf(nonTerminals.split(",")), + Symbol.setOf(terminals.split(",")), + rules, + Symbol.of(start) + ); + } +} diff --git a/src/main/java/nl/andrewlalis/grammar_tool/grammar/ProductionRule.java b/src/main/java/nl/andrewlalis/grammar_tool/grammar/ProductionRule.java new file mode 100644 index 0000000..110d7f9 --- /dev/null +++ b/src/main/java/nl/andrewlalis/grammar_tool/grammar/ProductionRule.java @@ -0,0 +1,81 @@ +package nl.andrewlalis.grammar_tool.grammar; + +import lombok.Getter; + +import java.util.*; +import java.util.stream.Collectors; + +@Getter +public class ProductionRule implements Comparable { + private final Symbol beginSymbol; + private final List producedSymbols; + + public ProductionRule(Symbol beginSymbol, List producedSymbols) { + this.beginSymbol = Objects.requireNonNull(beginSymbol); + this.producedSymbols = Objects.requireNonNull(producedSymbols); + } + + public boolean isEmpty() { + return this.producedSymbols.isEmpty(); + } + + public String getProducedSymbolsString() { + if (this.producedSymbols.isEmpty()) return "ε"; + return this.producedSymbols.stream().map(Symbol::getIdentifier).collect(Collectors.joining(",")); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ProductionRule that = (ProductionRule) o; + return getBeginSymbol().equals(that.getBeginSymbol()) && getProducedSymbols().equals(that.getProducedSymbols()); + } + + @Override + public int hashCode() { + return Objects.hash(getBeginSymbol(), getProducedSymbols()); + } + + @Override + public String toString() { + return this.beginSymbol + " -> " + this.getProducedSymbolsString(); + } + + @Override + public int compareTo(ProductionRule o) { + int beginComparison = this.beginSymbol.compareTo(o.beginSymbol); + if (beginComparison != 0) return beginComparison; + return -1 * Integer.compare(this.producedSymbols.size(), o.producedSymbols.size()); + } + + public static ProductionRule of(Symbol beginSymbol, Symbol... producedSymbols) { + return new ProductionRule(beginSymbol, Arrays.asList(producedSymbols)); + } + + @SafeVarargs + public static Set setOf(Symbol beginSymbol, List... producedSymbols) { + Set rules = new HashSet<>(); + for (var symbolsList : producedSymbols) { + rules.add(new ProductionRule(beginSymbol, symbolsList)); + } + return rules; + } + + public static Set of(String expression) { + Scanner scanner = new Scanner(expression); + Symbol beginSymbol = Symbol.of(scanner.next("\\w+")); + scanner.next("->"); + String[] productions = scanner.nextLine().split("\\|"); + Set rules = new HashSet<>(); + for (String productionExpr : productions) { + String[] symbolNames = productionExpr.split(","); + if (symbolNames.length == 1 && symbolNames[0].trim().equalsIgnoreCase("ε")) { + rules.add(ProductionRule.of(beginSymbol)); + continue; + } + rules.add(ProductionRule.of(beginSymbol, Symbol.arrayOf(symbolNames))); + } + return rules; + } +} diff --git a/src/main/java/nl/andrewlalis/grammar_tool/grammar/Symbol.java b/src/main/java/nl/andrewlalis/grammar_tool/grammar/Symbol.java new file mode 100644 index 0000000..bcfda51 --- /dev/null +++ b/src/main/java/nl/andrewlalis/grammar_tool/grammar/Symbol.java @@ -0,0 +1,60 @@ +package nl.andrewlalis.grammar_tool.grammar; + +import lombok.Getter; + +import java.util.HashSet; +import java.util.Objects; +import java.util.Set; + +public class Symbol implements Comparable { + @Getter + private final String identifier; + + public Symbol(String identifier) { + this.identifier = Objects.requireNonNull(identifier).trim(); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Symbol symbol = (Symbol) o; + return getIdentifier().equals(symbol.getIdentifier()); + } + + @Override + public int hashCode() { + return Objects.hash(getIdentifier()); + } + + @Override + public String toString() { + return this.identifier; + } + + @Override + public int compareTo(Symbol o) { + return this.identifier.compareTo(o.identifier); + } + + public static Symbol of(String identifier) { + return new Symbol(identifier); + } + + public static Set setOf(String... identifiers) { + Set symbols = new HashSet<>(); + for (String i : identifiers) { + symbols.add(new Symbol(i)); + } + return symbols; + } + + public static Symbol[] arrayOf(String... identifiers) { + Symbol[] symbols = new Symbol[identifiers.length]; + int index = 0; + for (String i : identifiers) { + symbols[index++] = new Symbol(i); + } + return symbols; + } +}