Added files.

This commit is contained in:
Andrew Lalis 2021-04-23 11:37:52 +02:00
parent 536a19d150
commit 48a69e4bb3
5 changed files with 349 additions and 0 deletions

25
pom.xml Normal file
View File

@ -0,0 +1,25 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>nl.andrewlalis</groupId>
<artifactId>grammar-tool</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>15</maven.compiler.source>
<maven.compiler.target>15</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.18</version>
<scope>provided</scope>
<optional>true</optional>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,20 @@
package nl.andrewlalis.grammar_tool;
import nl.andrewlalis.grammar_tool.grammar.ContextFreeGrammar;
public class GrammarTool {
public static void main(String[] args) {
ContextFreeGrammar g2 = ContextFreeGrammar.fromProductionRules(
"S",
"S, A, B, C",
"a, b, c",
"S -> A,B",
"A -> a,S | ε",
"B -> b,B | ε",
"C -> c,C | ε"
);
System.out.println(g2);
ContextFreeGrammar productive = g2.toProductiveForm();
System.out.println(productive);
}
}

View File

@ -0,0 +1,163 @@
package nl.andrewlalis.grammar_tool.grammar;
import lombok.Getter;
import java.util.*;
import java.util.stream.Collectors;
@Getter
public class ContextFreeGrammar {
private final Set<Symbol> nonTerminalSymbols;
private final Set<Symbol> terminalSymbols;
private final Set<ProductionRule> productionRules;
private final Symbol startSymbol;
public ContextFreeGrammar(Set<Symbol> nonTerminalSymbols, Set<Symbol> terminalSymbols, Set<ProductionRule> productionRules, Symbol startSymbol) {
this.nonTerminalSymbols = Objects.requireNonNull(nonTerminalSymbols);
this.terminalSymbols = Objects.requireNonNull(terminalSymbols);
this.productionRules = Objects.requireNonNull(productionRules);
this.startSymbol = Objects.requireNonNull(startSymbol);
this.ensureValidElements();
}
private void ensureValidElements() {
if (!nonTerminalSymbols.contains(startSymbol)) {
throw new IllegalArgumentException("Start symbol must be an element of the set of non-terminal symbols.");
}
Set<Symbol> overlaps = new HashSet<>(this.terminalSymbols);
overlaps.retainAll(this.nonTerminalSymbols);
if (!overlaps.isEmpty()) {
throw new IllegalArgumentException("Terminal and non-terminal symbols are overlapping: " + overlaps);
}
for (ProductionRule rule : productionRules) {
if (!nonTerminalSymbols.contains(rule.getBeginSymbol())) {
throw new IllegalArgumentException("Production rule " + rule.toString() + " must begin with a symbol from the set of non-terminals.");
}
for (Symbol s : rule.getProducedSymbols()) {
if (!nonTerminalSymbols.contains(s) && !terminalSymbols.contains(s)) {
throw new IllegalArgumentException("Production rule " + rule.toString() + " must produce a string containing symbols that are elements of either terminals or non-terminals.");
}
}
}
}
public boolean isSymbolTerminal(Symbol s) {
return this.terminalSymbols.contains(s);
}
public boolean isSymbolNonTerminal(Symbol s) {
return this.nonTerminalSymbols.contains(s);
}
public Set<ProductionRule> findRulesByStartingSymbol(Symbol s) {
Set<ProductionRule> rules = new HashSet<>();
for (var rule : this.productionRules) {
if (rule.getBeginSymbol().equals(s)) rules.add(rule);
}
return rules;
}
/**
* Determines if a symbol is recursive in the grammar. A symbol is defined
* as recursive if it is a non-terminal that begins at least one production
* rule, and by following the non-terminals that rule produces, the symbol
* is again encountered as the result of another production rule.
* @param s The symbol to check.
* @return True if the symbol is recursive, or false otherwise.
*/
public boolean isSymbolRecursive(Symbol s) {
Set<Symbol> symbolsToCheck = new HashSet<>();
symbolsToCheck.add(s);
Set<ProductionRule> rulesToCheck = new HashSet<>(this.productionRules);
while (!rulesToCheck.isEmpty()) {
Set<ProductionRule> rulesToAdd = new HashSet<>();
Set<ProductionRule> rulesToRemove = new HashSet<>();
for (var rule : rulesToCheck) {
if (rule.getProducedSymbols().contains(s)) return true;
if (symbolsToCheck.contains(rule.getBeginSymbol())) {
for (var symbol : rule.getProducedSymbols()) {
if (this.isSymbolNonTerminal(symbol)) {
rulesToAdd.addAll(this.productionRules);
}
}
}
rulesToRemove.add(rule);
}
rulesToCheck.removeAll(rulesToRemove);
rulesToCheck.addAll(rulesToAdd);
}
return false;
}
public ContextFreeGrammar toProductiveForm() {
Symbol newStart = this.startSymbol;
Set<Symbol> nonTerminals = new HashSet<>(this.nonTerminalSymbols);
Set<Symbol> terminals = new HashSet<>(this.terminalSymbols);
Set<ProductionRule> rules = new HashSet<>(this.productionRules);
if (this.isSymbolRecursive(this.startSymbol)) {
newStart = Symbol.of("_T");
nonTerminals.add(newStart);
rules.add(ProductionRule.of(newStart, this.startSymbol));
}
// Find all nullables.
return new ContextFreeGrammar(nonTerminals, terminals, rules, newStart);
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
ContextFreeGrammar that = (ContextFreeGrammar) o;
return getNonTerminalSymbols().equals(that.getNonTerminalSymbols())
&& getTerminalSymbols().equals(that.getTerminalSymbols())
&& getProductionRules().equals(that.getProductionRules())
&& getStartSymbol().equals(that.getStartSymbol());
}
@Override
public int hashCode() {
return Objects.hash(getNonTerminalSymbols(), getTerminalSymbols(), getProductionRules(), getStartSymbol());
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
Map<Symbol, SortedSet<ProductionRule>> rulesMap = new HashMap<>();
SortedSet<Symbol> beginSymbols = new TreeSet<>();
for (ProductionRule rule : this.productionRules) {
beginSymbols.add(rule.getBeginSymbol());
if (!rulesMap.containsKey(rule.getBeginSymbol())) {
rulesMap.put(rule.getBeginSymbol(), new TreeSet<>());
}
rulesMap.get(rule.getBeginSymbol()).add(rule);
}
// Do start symbol explicitly at the beginning.
beginSymbols.remove(this.startSymbol);
sb.append(this.startSymbol.getIdentifier()).append(" -> ");
sb.append(rulesMap.get(this.getStartSymbol()).stream().map(ProductionRule::getProducedSymbolsString).collect(Collectors.joining(" | ")));
sb.append("\n");
for (Symbol s : beginSymbols) {
sb.append(s.getIdentifier()).append(" -> ");
sb.append(rulesMap.get(s).stream().map(ProductionRule::getProducedSymbolsString).collect(Collectors.joining(" | ")));
sb.append("\n");
}
return sb.toString();
}
public static ContextFreeGrammar fromProductionRules(String start, String nonTerminals, String terminals, String... ruleExpressions) {
Set<ProductionRule> rules = new HashSet<>();
for (String ruleExpr : ruleExpressions) {
rules.addAll(ProductionRule.of(ruleExpr));
}
return new ContextFreeGrammar(
Symbol.setOf(nonTerminals.split(",")),
Symbol.setOf(terminals.split(",")),
rules,
Symbol.of(start)
);
}
}

View File

@ -0,0 +1,81 @@
package nl.andrewlalis.grammar_tool.grammar;
import lombok.Getter;
import java.util.*;
import java.util.stream.Collectors;
@Getter
public class ProductionRule implements Comparable<ProductionRule> {
private final Symbol beginSymbol;
private final List<Symbol> producedSymbols;
public ProductionRule(Symbol beginSymbol, List<Symbol> producedSymbols) {
this.beginSymbol = Objects.requireNonNull(beginSymbol);
this.producedSymbols = Objects.requireNonNull(producedSymbols);
}
public boolean isEmpty() {
return this.producedSymbols.isEmpty();
}
public String getProducedSymbolsString() {
if (this.producedSymbols.isEmpty()) return "ε";
return this.producedSymbols.stream().map(Symbol::getIdentifier).collect(Collectors.joining(","));
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
ProductionRule that = (ProductionRule) o;
return getBeginSymbol().equals(that.getBeginSymbol()) && getProducedSymbols().equals(that.getProducedSymbols());
}
@Override
public int hashCode() {
return Objects.hash(getBeginSymbol(), getProducedSymbols());
}
@Override
public String toString() {
return this.beginSymbol + " -> " + this.getProducedSymbolsString();
}
@Override
public int compareTo(ProductionRule o) {
int beginComparison = this.beginSymbol.compareTo(o.beginSymbol);
if (beginComparison != 0) return beginComparison;
return -1 * Integer.compare(this.producedSymbols.size(), o.producedSymbols.size());
}
public static ProductionRule of(Symbol beginSymbol, Symbol... producedSymbols) {
return new ProductionRule(beginSymbol, Arrays.asList(producedSymbols));
}
@SafeVarargs
public static Set<ProductionRule> setOf(Symbol beginSymbol, List<Symbol>... producedSymbols) {
Set<ProductionRule> rules = new HashSet<>();
for (var symbolsList : producedSymbols) {
rules.add(new ProductionRule(beginSymbol, symbolsList));
}
return rules;
}
public static Set<ProductionRule> of(String expression) {
Scanner scanner = new Scanner(expression);
Symbol beginSymbol = Symbol.of(scanner.next("\\w+"));
scanner.next("->");
String[] productions = scanner.nextLine().split("\\|");
Set<ProductionRule> rules = new HashSet<>();
for (String productionExpr : productions) {
String[] symbolNames = productionExpr.split(",");
if (symbolNames.length == 1 && symbolNames[0].trim().equalsIgnoreCase("ε")) {
rules.add(ProductionRule.of(beginSymbol));
continue;
}
rules.add(ProductionRule.of(beginSymbol, Symbol.arrayOf(symbolNames)));
}
return rules;
}
}

View File

@ -0,0 +1,60 @@
package nl.andrewlalis.grammar_tool.grammar;
import lombok.Getter;
import java.util.HashSet;
import java.util.Objects;
import java.util.Set;
public class Symbol implements Comparable<Symbol> {
@Getter
private final String identifier;
public Symbol(String identifier) {
this.identifier = Objects.requireNonNull(identifier).trim();
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Symbol symbol = (Symbol) o;
return getIdentifier().equals(symbol.getIdentifier());
}
@Override
public int hashCode() {
return Objects.hash(getIdentifier());
}
@Override
public String toString() {
return this.identifier;
}
@Override
public int compareTo(Symbol o) {
return this.identifier.compareTo(o.identifier);
}
public static Symbol of(String identifier) {
return new Symbol(identifier);
}
public static Set<Symbol> setOf(String... identifiers) {
Set<Symbol> symbols = new HashSet<>();
for (String i : identifiers) {
symbols.add(new Symbol(i));
}
return symbols;
}
public static Symbol[] arrayOf(String... identifiers) {
Symbol[] symbols = new Symbol[identifiers.length];
int index = 0;
for (String i : identifiers) {
symbols[index++] = new Symbol(i);
}
return symbols;
}
}