Added files.
This commit is contained in:
parent
536a19d150
commit
48a69e4bb3
|
@ -0,0 +1,25 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>nl.andrewlalis</groupId>
|
||||
<artifactId>grammar-tool</artifactId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
|
||||
<properties>
|
||||
<maven.compiler.source>15</maven.compiler.source>
|
||||
<maven.compiler.target>15</maven.compiler.target>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok</artifactId>
|
||||
<version>1.18.18</version>
|
||||
<scope>provided</scope>
|
||||
<optional>true</optional>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
|
@ -0,0 +1,20 @@
|
|||
package nl.andrewlalis.grammar_tool;
|
||||
|
||||
import nl.andrewlalis.grammar_tool.grammar.ContextFreeGrammar;
|
||||
|
||||
public class GrammarTool {
|
||||
public static void main(String[] args) {
|
||||
ContextFreeGrammar g2 = ContextFreeGrammar.fromProductionRules(
|
||||
"S",
|
||||
"S, A, B, C",
|
||||
"a, b, c",
|
||||
"S -> A,B",
|
||||
"A -> a,S | ε",
|
||||
"B -> b,B | ε",
|
||||
"C -> c,C | ε"
|
||||
);
|
||||
System.out.println(g2);
|
||||
ContextFreeGrammar productive = g2.toProductiveForm();
|
||||
System.out.println(productive);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,163 @@
|
|||
package nl.andrewlalis.grammar_tool.grammar;
|
||||
|
||||
import lombok.Getter;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Getter
|
||||
public class ContextFreeGrammar {
|
||||
private final Set<Symbol> nonTerminalSymbols;
|
||||
private final Set<Symbol> terminalSymbols;
|
||||
private final Set<ProductionRule> productionRules;
|
||||
private final Symbol startSymbol;
|
||||
|
||||
public ContextFreeGrammar(Set<Symbol> nonTerminalSymbols, Set<Symbol> terminalSymbols, Set<ProductionRule> productionRules, Symbol startSymbol) {
|
||||
this.nonTerminalSymbols = Objects.requireNonNull(nonTerminalSymbols);
|
||||
this.terminalSymbols = Objects.requireNonNull(terminalSymbols);
|
||||
this.productionRules = Objects.requireNonNull(productionRules);
|
||||
this.startSymbol = Objects.requireNonNull(startSymbol);
|
||||
this.ensureValidElements();
|
||||
}
|
||||
|
||||
private void ensureValidElements() {
|
||||
if (!nonTerminalSymbols.contains(startSymbol)) {
|
||||
throw new IllegalArgumentException("Start symbol must be an element of the set of non-terminal symbols.");
|
||||
}
|
||||
Set<Symbol> overlaps = new HashSet<>(this.terminalSymbols);
|
||||
overlaps.retainAll(this.nonTerminalSymbols);
|
||||
if (!overlaps.isEmpty()) {
|
||||
throw new IllegalArgumentException("Terminal and non-terminal symbols are overlapping: " + overlaps);
|
||||
}
|
||||
for (ProductionRule rule : productionRules) {
|
||||
if (!nonTerminalSymbols.contains(rule.getBeginSymbol())) {
|
||||
throw new IllegalArgumentException("Production rule " + rule.toString() + " must begin with a symbol from the set of non-terminals.");
|
||||
}
|
||||
for (Symbol s : rule.getProducedSymbols()) {
|
||||
if (!nonTerminalSymbols.contains(s) && !terminalSymbols.contains(s)) {
|
||||
throw new IllegalArgumentException("Production rule " + rule.toString() + " must produce a string containing symbols that are elements of either terminals or non-terminals.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isSymbolTerminal(Symbol s) {
|
||||
return this.terminalSymbols.contains(s);
|
||||
}
|
||||
|
||||
public boolean isSymbolNonTerminal(Symbol s) {
|
||||
return this.nonTerminalSymbols.contains(s);
|
||||
}
|
||||
|
||||
public Set<ProductionRule> findRulesByStartingSymbol(Symbol s) {
|
||||
Set<ProductionRule> rules = new HashSet<>();
|
||||
for (var rule : this.productionRules) {
|
||||
if (rule.getBeginSymbol().equals(s)) rules.add(rule);
|
||||
}
|
||||
return rules;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if a symbol is recursive in the grammar. A symbol is defined
|
||||
* as recursive if it is a non-terminal that begins at least one production
|
||||
* rule, and by following the non-terminals that rule produces, the symbol
|
||||
* is again encountered as the result of another production rule.
|
||||
* @param s The symbol to check.
|
||||
* @return True if the symbol is recursive, or false otherwise.
|
||||
*/
|
||||
public boolean isSymbolRecursive(Symbol s) {
|
||||
Set<Symbol> symbolsToCheck = new HashSet<>();
|
||||
symbolsToCheck.add(s);
|
||||
Set<ProductionRule> rulesToCheck = new HashSet<>(this.productionRules);
|
||||
while (!rulesToCheck.isEmpty()) {
|
||||
Set<ProductionRule> rulesToAdd = new HashSet<>();
|
||||
Set<ProductionRule> rulesToRemove = new HashSet<>();
|
||||
for (var rule : rulesToCheck) {
|
||||
if (rule.getProducedSymbols().contains(s)) return true;
|
||||
if (symbolsToCheck.contains(rule.getBeginSymbol())) {
|
||||
for (var symbol : rule.getProducedSymbols()) {
|
||||
if (this.isSymbolNonTerminal(symbol)) {
|
||||
rulesToAdd.addAll(this.productionRules);
|
||||
}
|
||||
}
|
||||
}
|
||||
rulesToRemove.add(rule);
|
||||
}
|
||||
rulesToCheck.removeAll(rulesToRemove);
|
||||
rulesToCheck.addAll(rulesToAdd);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public ContextFreeGrammar toProductiveForm() {
|
||||
Symbol newStart = this.startSymbol;
|
||||
Set<Symbol> nonTerminals = new HashSet<>(this.nonTerminalSymbols);
|
||||
Set<Symbol> terminals = new HashSet<>(this.terminalSymbols);
|
||||
Set<ProductionRule> rules = new HashSet<>(this.productionRules);
|
||||
if (this.isSymbolRecursive(this.startSymbol)) {
|
||||
newStart = Symbol.of("_T");
|
||||
nonTerminals.add(newStart);
|
||||
rules.add(ProductionRule.of(newStart, this.startSymbol));
|
||||
}
|
||||
|
||||
// Find all nullables.
|
||||
|
||||
|
||||
return new ContextFreeGrammar(nonTerminals, terminals, rules, newStart);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
ContextFreeGrammar that = (ContextFreeGrammar) o;
|
||||
return getNonTerminalSymbols().equals(that.getNonTerminalSymbols())
|
||||
&& getTerminalSymbols().equals(that.getTerminalSymbols())
|
||||
&& getProductionRules().equals(that.getProductionRules())
|
||||
&& getStartSymbol().equals(that.getStartSymbol());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(getNonTerminalSymbols(), getTerminalSymbols(), getProductionRules(), getStartSymbol());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
Map<Symbol, SortedSet<ProductionRule>> rulesMap = new HashMap<>();
|
||||
SortedSet<Symbol> beginSymbols = new TreeSet<>();
|
||||
for (ProductionRule rule : this.productionRules) {
|
||||
beginSymbols.add(rule.getBeginSymbol());
|
||||
if (!rulesMap.containsKey(rule.getBeginSymbol())) {
|
||||
rulesMap.put(rule.getBeginSymbol(), new TreeSet<>());
|
||||
}
|
||||
rulesMap.get(rule.getBeginSymbol()).add(rule);
|
||||
}
|
||||
// Do start symbol explicitly at the beginning.
|
||||
beginSymbols.remove(this.startSymbol);
|
||||
sb.append(this.startSymbol.getIdentifier()).append(" -> ");
|
||||
sb.append(rulesMap.get(this.getStartSymbol()).stream().map(ProductionRule::getProducedSymbolsString).collect(Collectors.joining(" | ")));
|
||||
sb.append("\n");
|
||||
|
||||
for (Symbol s : beginSymbols) {
|
||||
sb.append(s.getIdentifier()).append(" -> ");
|
||||
sb.append(rulesMap.get(s).stream().map(ProductionRule::getProducedSymbolsString).collect(Collectors.joining(" | ")));
|
||||
sb.append("\n");
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public static ContextFreeGrammar fromProductionRules(String start, String nonTerminals, String terminals, String... ruleExpressions) {
|
||||
Set<ProductionRule> rules = new HashSet<>();
|
||||
for (String ruleExpr : ruleExpressions) {
|
||||
rules.addAll(ProductionRule.of(ruleExpr));
|
||||
}
|
||||
return new ContextFreeGrammar(
|
||||
Symbol.setOf(nonTerminals.split(",")),
|
||||
Symbol.setOf(terminals.split(",")),
|
||||
rules,
|
||||
Symbol.of(start)
|
||||
);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,81 @@
|
|||
package nl.andrewlalis.grammar_tool.grammar;
|
||||
|
||||
import lombok.Getter;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Getter
|
||||
public class ProductionRule implements Comparable<ProductionRule> {
|
||||
private final Symbol beginSymbol;
|
||||
private final List<Symbol> producedSymbols;
|
||||
|
||||
public ProductionRule(Symbol beginSymbol, List<Symbol> producedSymbols) {
|
||||
this.beginSymbol = Objects.requireNonNull(beginSymbol);
|
||||
this.producedSymbols = Objects.requireNonNull(producedSymbols);
|
||||
}
|
||||
|
||||
public boolean isEmpty() {
|
||||
return this.producedSymbols.isEmpty();
|
||||
}
|
||||
|
||||
public String getProducedSymbolsString() {
|
||||
if (this.producedSymbols.isEmpty()) return "ε";
|
||||
return this.producedSymbols.stream().map(Symbol::getIdentifier).collect(Collectors.joining(","));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
ProductionRule that = (ProductionRule) o;
|
||||
return getBeginSymbol().equals(that.getBeginSymbol()) && getProducedSymbols().equals(that.getProducedSymbols());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(getBeginSymbol(), getProducedSymbols());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return this.beginSymbol + " -> " + this.getProducedSymbolsString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(ProductionRule o) {
|
||||
int beginComparison = this.beginSymbol.compareTo(o.beginSymbol);
|
||||
if (beginComparison != 0) return beginComparison;
|
||||
return -1 * Integer.compare(this.producedSymbols.size(), o.producedSymbols.size());
|
||||
}
|
||||
|
||||
public static ProductionRule of(Symbol beginSymbol, Symbol... producedSymbols) {
|
||||
return new ProductionRule(beginSymbol, Arrays.asList(producedSymbols));
|
||||
}
|
||||
|
||||
@SafeVarargs
|
||||
public static Set<ProductionRule> setOf(Symbol beginSymbol, List<Symbol>... producedSymbols) {
|
||||
Set<ProductionRule> rules = new HashSet<>();
|
||||
for (var symbolsList : producedSymbols) {
|
||||
rules.add(new ProductionRule(beginSymbol, symbolsList));
|
||||
}
|
||||
return rules;
|
||||
}
|
||||
|
||||
public static Set<ProductionRule> of(String expression) {
|
||||
Scanner scanner = new Scanner(expression);
|
||||
Symbol beginSymbol = Symbol.of(scanner.next("\\w+"));
|
||||
scanner.next("->");
|
||||
String[] productions = scanner.nextLine().split("\\|");
|
||||
Set<ProductionRule> rules = new HashSet<>();
|
||||
for (String productionExpr : productions) {
|
||||
String[] symbolNames = productionExpr.split(",");
|
||||
if (symbolNames.length == 1 && symbolNames[0].trim().equalsIgnoreCase("ε")) {
|
||||
rules.add(ProductionRule.of(beginSymbol));
|
||||
continue;
|
||||
}
|
||||
rules.add(ProductionRule.of(beginSymbol, Symbol.arrayOf(symbolNames)));
|
||||
}
|
||||
return rules;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
package nl.andrewlalis.grammar_tool.grammar;
|
||||
|
||||
import lombok.Getter;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
public class Symbol implements Comparable<Symbol> {
|
||||
@Getter
|
||||
private final String identifier;
|
||||
|
||||
public Symbol(String identifier) {
|
||||
this.identifier = Objects.requireNonNull(identifier).trim();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
Symbol symbol = (Symbol) o;
|
||||
return getIdentifier().equals(symbol.getIdentifier());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(getIdentifier());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return this.identifier;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(Symbol o) {
|
||||
return this.identifier.compareTo(o.identifier);
|
||||
}
|
||||
|
||||
public static Symbol of(String identifier) {
|
||||
return new Symbol(identifier);
|
||||
}
|
||||
|
||||
public static Set<Symbol> setOf(String... identifiers) {
|
||||
Set<Symbol> symbols = new HashSet<>();
|
||||
for (String i : identifiers) {
|
||||
symbols.add(new Symbol(i));
|
||||
}
|
||||
return symbols;
|
||||
}
|
||||
|
||||
public static Symbol[] arrayOf(String... identifiers) {
|
||||
Symbol[] symbols = new Symbol[identifiers.length];
|
||||
int index = 0;
|
||||
for (String i : identifiers) {
|
||||
symbols[index++] = new Symbol(i);
|
||||
}
|
||||
return symbols;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue