Added files.
This commit is contained in:
parent
536a19d150
commit
48a69e4bb3
|
@ -0,0 +1,25 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<groupId>nl.andrewlalis</groupId>
|
||||||
|
<artifactId>grammar-tool</artifactId>
|
||||||
|
<version>1.0-SNAPSHOT</version>
|
||||||
|
|
||||||
|
<properties>
|
||||||
|
<maven.compiler.source>15</maven.compiler.source>
|
||||||
|
<maven.compiler.target>15</maven.compiler.target>
|
||||||
|
</properties>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.projectlombok</groupId>
|
||||||
|
<artifactId>lombok</artifactId>
|
||||||
|
<version>1.18.18</version>
|
||||||
|
<scope>provided</scope>
|
||||||
|
<optional>true</optional>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
</project>
|
|
@ -0,0 +1,20 @@
|
||||||
|
package nl.andrewlalis.grammar_tool;
|
||||||
|
|
||||||
|
import nl.andrewlalis.grammar_tool.grammar.ContextFreeGrammar;
|
||||||
|
|
||||||
|
public class GrammarTool {
|
||||||
|
public static void main(String[] args) {
|
||||||
|
ContextFreeGrammar g2 = ContextFreeGrammar.fromProductionRules(
|
||||||
|
"S",
|
||||||
|
"S, A, B, C",
|
||||||
|
"a, b, c",
|
||||||
|
"S -> A,B",
|
||||||
|
"A -> a,S | ε",
|
||||||
|
"B -> b,B | ε",
|
||||||
|
"C -> c,C | ε"
|
||||||
|
);
|
||||||
|
System.out.println(g2);
|
||||||
|
ContextFreeGrammar productive = g2.toProductiveForm();
|
||||||
|
System.out.println(productive);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,163 @@
|
||||||
|
package nl.andrewlalis.grammar_tool.grammar;
|
||||||
|
|
||||||
|
import lombok.Getter;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
public class ContextFreeGrammar {
|
||||||
|
private final Set<Symbol> nonTerminalSymbols;
|
||||||
|
private final Set<Symbol> terminalSymbols;
|
||||||
|
private final Set<ProductionRule> productionRules;
|
||||||
|
private final Symbol startSymbol;
|
||||||
|
|
||||||
|
public ContextFreeGrammar(Set<Symbol> nonTerminalSymbols, Set<Symbol> terminalSymbols, Set<ProductionRule> productionRules, Symbol startSymbol) {
|
||||||
|
this.nonTerminalSymbols = Objects.requireNonNull(nonTerminalSymbols);
|
||||||
|
this.terminalSymbols = Objects.requireNonNull(terminalSymbols);
|
||||||
|
this.productionRules = Objects.requireNonNull(productionRules);
|
||||||
|
this.startSymbol = Objects.requireNonNull(startSymbol);
|
||||||
|
this.ensureValidElements();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void ensureValidElements() {
|
||||||
|
if (!nonTerminalSymbols.contains(startSymbol)) {
|
||||||
|
throw new IllegalArgumentException("Start symbol must be an element of the set of non-terminal symbols.");
|
||||||
|
}
|
||||||
|
Set<Symbol> overlaps = new HashSet<>(this.terminalSymbols);
|
||||||
|
overlaps.retainAll(this.nonTerminalSymbols);
|
||||||
|
if (!overlaps.isEmpty()) {
|
||||||
|
throw new IllegalArgumentException("Terminal and non-terminal symbols are overlapping: " + overlaps);
|
||||||
|
}
|
||||||
|
for (ProductionRule rule : productionRules) {
|
||||||
|
if (!nonTerminalSymbols.contains(rule.getBeginSymbol())) {
|
||||||
|
throw new IllegalArgumentException("Production rule " + rule.toString() + " must begin with a symbol from the set of non-terminals.");
|
||||||
|
}
|
||||||
|
for (Symbol s : rule.getProducedSymbols()) {
|
||||||
|
if (!nonTerminalSymbols.contains(s) && !terminalSymbols.contains(s)) {
|
||||||
|
throw new IllegalArgumentException("Production rule " + rule.toString() + " must produce a string containing symbols that are elements of either terminals or non-terminals.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isSymbolTerminal(Symbol s) {
|
||||||
|
return this.terminalSymbols.contains(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isSymbolNonTerminal(Symbol s) {
|
||||||
|
return this.nonTerminalSymbols.contains(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Set<ProductionRule> findRulesByStartingSymbol(Symbol s) {
|
||||||
|
Set<ProductionRule> rules = new HashSet<>();
|
||||||
|
for (var rule : this.productionRules) {
|
||||||
|
if (rule.getBeginSymbol().equals(s)) rules.add(rule);
|
||||||
|
}
|
||||||
|
return rules;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determines if a symbol is recursive in the grammar. A symbol is defined
|
||||||
|
* as recursive if it is a non-terminal that begins at least one production
|
||||||
|
* rule, and by following the non-terminals that rule produces, the symbol
|
||||||
|
* is again encountered as the result of another production rule.
|
||||||
|
* @param s The symbol to check.
|
||||||
|
* @return True if the symbol is recursive, or false otherwise.
|
||||||
|
*/
|
||||||
|
public boolean isSymbolRecursive(Symbol s) {
|
||||||
|
Set<Symbol> symbolsToCheck = new HashSet<>();
|
||||||
|
symbolsToCheck.add(s);
|
||||||
|
Set<ProductionRule> rulesToCheck = new HashSet<>(this.productionRules);
|
||||||
|
while (!rulesToCheck.isEmpty()) {
|
||||||
|
Set<ProductionRule> rulesToAdd = new HashSet<>();
|
||||||
|
Set<ProductionRule> rulesToRemove = new HashSet<>();
|
||||||
|
for (var rule : rulesToCheck) {
|
||||||
|
if (rule.getProducedSymbols().contains(s)) return true;
|
||||||
|
if (symbolsToCheck.contains(rule.getBeginSymbol())) {
|
||||||
|
for (var symbol : rule.getProducedSymbols()) {
|
||||||
|
if (this.isSymbolNonTerminal(symbol)) {
|
||||||
|
rulesToAdd.addAll(this.productionRules);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rulesToRemove.add(rule);
|
||||||
|
}
|
||||||
|
rulesToCheck.removeAll(rulesToRemove);
|
||||||
|
rulesToCheck.addAll(rulesToAdd);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ContextFreeGrammar toProductiveForm() {
|
||||||
|
Symbol newStart = this.startSymbol;
|
||||||
|
Set<Symbol> nonTerminals = new HashSet<>(this.nonTerminalSymbols);
|
||||||
|
Set<Symbol> terminals = new HashSet<>(this.terminalSymbols);
|
||||||
|
Set<ProductionRule> rules = new HashSet<>(this.productionRules);
|
||||||
|
if (this.isSymbolRecursive(this.startSymbol)) {
|
||||||
|
newStart = Symbol.of("_T");
|
||||||
|
nonTerminals.add(newStart);
|
||||||
|
rules.add(ProductionRule.of(newStart, this.startSymbol));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find all nullables.
|
||||||
|
|
||||||
|
|
||||||
|
return new ContextFreeGrammar(nonTerminals, terminals, rules, newStart);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (this == o) return true;
|
||||||
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
|
ContextFreeGrammar that = (ContextFreeGrammar) o;
|
||||||
|
return getNonTerminalSymbols().equals(that.getNonTerminalSymbols())
|
||||||
|
&& getTerminalSymbols().equals(that.getTerminalSymbols())
|
||||||
|
&& getProductionRules().equals(that.getProductionRules())
|
||||||
|
&& getStartSymbol().equals(that.getStartSymbol());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return Objects.hash(getNonTerminalSymbols(), getTerminalSymbols(), getProductionRules(), getStartSymbol());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
Map<Symbol, SortedSet<ProductionRule>> rulesMap = new HashMap<>();
|
||||||
|
SortedSet<Symbol> beginSymbols = new TreeSet<>();
|
||||||
|
for (ProductionRule rule : this.productionRules) {
|
||||||
|
beginSymbols.add(rule.getBeginSymbol());
|
||||||
|
if (!rulesMap.containsKey(rule.getBeginSymbol())) {
|
||||||
|
rulesMap.put(rule.getBeginSymbol(), new TreeSet<>());
|
||||||
|
}
|
||||||
|
rulesMap.get(rule.getBeginSymbol()).add(rule);
|
||||||
|
}
|
||||||
|
// Do start symbol explicitly at the beginning.
|
||||||
|
beginSymbols.remove(this.startSymbol);
|
||||||
|
sb.append(this.startSymbol.getIdentifier()).append(" -> ");
|
||||||
|
sb.append(rulesMap.get(this.getStartSymbol()).stream().map(ProductionRule::getProducedSymbolsString).collect(Collectors.joining(" | ")));
|
||||||
|
sb.append("\n");
|
||||||
|
|
||||||
|
for (Symbol s : beginSymbols) {
|
||||||
|
sb.append(s.getIdentifier()).append(" -> ");
|
||||||
|
sb.append(rulesMap.get(s).stream().map(ProductionRule::getProducedSymbolsString).collect(Collectors.joining(" | ")));
|
||||||
|
sb.append("\n");
|
||||||
|
}
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ContextFreeGrammar fromProductionRules(String start, String nonTerminals, String terminals, String... ruleExpressions) {
|
||||||
|
Set<ProductionRule> rules = new HashSet<>();
|
||||||
|
for (String ruleExpr : ruleExpressions) {
|
||||||
|
rules.addAll(ProductionRule.of(ruleExpr));
|
||||||
|
}
|
||||||
|
return new ContextFreeGrammar(
|
||||||
|
Symbol.setOf(nonTerminals.split(",")),
|
||||||
|
Symbol.setOf(terminals.split(",")),
|
||||||
|
rules,
|
||||||
|
Symbol.of(start)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,81 @@
|
||||||
|
package nl.andrewlalis.grammar_tool.grammar;
|
||||||
|
|
||||||
|
import lombok.Getter;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
public class ProductionRule implements Comparable<ProductionRule> {
|
||||||
|
private final Symbol beginSymbol;
|
||||||
|
private final List<Symbol> producedSymbols;
|
||||||
|
|
||||||
|
public ProductionRule(Symbol beginSymbol, List<Symbol> producedSymbols) {
|
||||||
|
this.beginSymbol = Objects.requireNonNull(beginSymbol);
|
||||||
|
this.producedSymbols = Objects.requireNonNull(producedSymbols);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isEmpty() {
|
||||||
|
return this.producedSymbols.isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getProducedSymbolsString() {
|
||||||
|
if (this.producedSymbols.isEmpty()) return "ε";
|
||||||
|
return this.producedSymbols.stream().map(Symbol::getIdentifier).collect(Collectors.joining(","));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (this == o) return true;
|
||||||
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
|
ProductionRule that = (ProductionRule) o;
|
||||||
|
return getBeginSymbol().equals(that.getBeginSymbol()) && getProducedSymbols().equals(that.getProducedSymbols());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return Objects.hash(getBeginSymbol(), getProducedSymbols());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return this.beginSymbol + " -> " + this.getProducedSymbolsString();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareTo(ProductionRule o) {
|
||||||
|
int beginComparison = this.beginSymbol.compareTo(o.beginSymbol);
|
||||||
|
if (beginComparison != 0) return beginComparison;
|
||||||
|
return -1 * Integer.compare(this.producedSymbols.size(), o.producedSymbols.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ProductionRule of(Symbol beginSymbol, Symbol... producedSymbols) {
|
||||||
|
return new ProductionRule(beginSymbol, Arrays.asList(producedSymbols));
|
||||||
|
}
|
||||||
|
|
||||||
|
@SafeVarargs
|
||||||
|
public static Set<ProductionRule> setOf(Symbol beginSymbol, List<Symbol>... producedSymbols) {
|
||||||
|
Set<ProductionRule> rules = new HashSet<>();
|
||||||
|
for (var symbolsList : producedSymbols) {
|
||||||
|
rules.add(new ProductionRule(beginSymbol, symbolsList));
|
||||||
|
}
|
||||||
|
return rules;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Set<ProductionRule> of(String expression) {
|
||||||
|
Scanner scanner = new Scanner(expression);
|
||||||
|
Symbol beginSymbol = Symbol.of(scanner.next("\\w+"));
|
||||||
|
scanner.next("->");
|
||||||
|
String[] productions = scanner.nextLine().split("\\|");
|
||||||
|
Set<ProductionRule> rules = new HashSet<>();
|
||||||
|
for (String productionExpr : productions) {
|
||||||
|
String[] symbolNames = productionExpr.split(",");
|
||||||
|
if (symbolNames.length == 1 && symbolNames[0].trim().equalsIgnoreCase("ε")) {
|
||||||
|
rules.add(ProductionRule.of(beginSymbol));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
rules.add(ProductionRule.of(beginSymbol, Symbol.arrayOf(symbolNames)));
|
||||||
|
}
|
||||||
|
return rules;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,60 @@
|
||||||
|
package nl.andrewlalis.grammar_tool.grammar;
|
||||||
|
|
||||||
|
import lombok.Getter;
|
||||||
|
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
public class Symbol implements Comparable<Symbol> {
|
||||||
|
@Getter
|
||||||
|
private final String identifier;
|
||||||
|
|
||||||
|
public Symbol(String identifier) {
|
||||||
|
this.identifier = Objects.requireNonNull(identifier).trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (this == o) return true;
|
||||||
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
|
Symbol symbol = (Symbol) o;
|
||||||
|
return getIdentifier().equals(symbol.getIdentifier());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return Objects.hash(getIdentifier());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return this.identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareTo(Symbol o) {
|
||||||
|
return this.identifier.compareTo(o.identifier);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Symbol of(String identifier) {
|
||||||
|
return new Symbol(identifier);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Set<Symbol> setOf(String... identifiers) {
|
||||||
|
Set<Symbol> symbols = new HashSet<>();
|
||||||
|
for (String i : identifiers) {
|
||||||
|
symbols.add(new Symbol(i));
|
||||||
|
}
|
||||||
|
return symbols;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Symbol[] arrayOf(String... identifiers) {
|
||||||
|
Symbol[] symbols = new Symbol[identifiers.length];
|
||||||
|
int index = 0;
|
||||||
|
for (String i : identifiers) {
|
||||||
|
symbols[index++] = new Symbol(i);
|
||||||
|
}
|
||||||
|
return symbols;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue