Added implementation.
This commit is contained in:
parent
1ca30f1c06
commit
c91c116d4e
|
@ -0,0 +1,4 @@
|
|||
target/
|
||||
.idea/
|
||||
airports-index/
|
||||
*.iml
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,31 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>io.github.andrewlalis</groupId>
|
||||
<artifactId>SampleLuceneSearch</artifactId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
|
||||
<properties>
|
||||
<maven.compiler.source>17</maven.compiler.source>
|
||||
<maven.compiler.target>17</maven.compiler.target>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-core -->
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-core</artifactId>
|
||||
<version>9.5.0</version>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-csv -->
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-csv</artifactId>
|
||||
<version>1.10.0</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
|
@ -0,0 +1,24 @@
|
|||
package io.github.andrewlalis.sample_lucene_search;
|
||||
|
||||
import java.util.Optional;
|
||||
|
||||
public record Airport(
|
||||
long id,
|
||||
String ident,
|
||||
String type,
|
||||
String name,
|
||||
double latitude,
|
||||
double longitude,
|
||||
Optional<Integer> elevationFt,
|
||||
String continent,
|
||||
String isoCountry,
|
||||
String isoRegion,
|
||||
String municipality,
|
||||
boolean scheduledService,
|
||||
Optional<String> gpsCode,
|
||||
Optional<String> iataCode,
|
||||
Optional<String> localCode,
|
||||
Optional<String> homeLink,
|
||||
Optional<String> wikipediaLink,
|
||||
Optional<String> keywords
|
||||
) {}
|
|
@ -0,0 +1,66 @@
|
|||
package io.github.andrewlalis.sample_lucene_search;
|
||||
|
||||
import org.apache.commons.csv.CSVFormat;
|
||||
import org.apache.commons.csv.CSVRecord;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
public final class AirportParser {
|
||||
private AirportParser() {}
|
||||
|
||||
public static List<Airport> parseAirports(Path filePath) {
|
||||
CSVFormat format = CSVFormat.DEFAULT.builder()
|
||||
.setHeader()
|
||||
.setSkipHeaderRecord(true)
|
||||
.build();
|
||||
try (
|
||||
var reader = Files.newBufferedReader(filePath);
|
||||
var parser = format.parse(reader)
|
||||
) {
|
||||
var it = parser.iterator();
|
||||
List<Airport> airports = new ArrayList<>();
|
||||
while (it.hasNext()) {
|
||||
airports.add(parseAirport(it.next()));
|
||||
}
|
||||
return airports;
|
||||
} catch (IOException e) {
|
||||
System.err.println("Error reading airports.");
|
||||
e.printStackTrace();
|
||||
return new ArrayList<>();
|
||||
}
|
||||
}
|
||||
|
||||
private static Airport parseAirport(CSVRecord r) {
|
||||
return new Airport(
|
||||
Long.parseLong(r.get("id")),
|
||||
r.get("ident"),
|
||||
r.get("type"),
|
||||
r.get("name"),
|
||||
Double.parseDouble(r.get("latitude_deg")),
|
||||
Double.parseDouble(r.get("longitude_deg")),
|
||||
getOptionalString(r, "elevation_ft").map(Integer::parseInt),
|
||||
r.get("continent"),
|
||||
r.get("iso_country"),
|
||||
r.get("iso_region"),
|
||||
r.get("municipality"),
|
||||
r.get("scheduled_service").equalsIgnoreCase("yes"),
|
||||
getOptionalString(r, "gps_code"),
|
||||
getOptionalString(r, "iata_code"),
|
||||
getOptionalString(r, "local_code"),
|
||||
getOptionalString(r, "home_link"),
|
||||
getOptionalString(r, "wikipedia_link"),
|
||||
getOptionalString(r, "keywords")
|
||||
);
|
||||
}
|
||||
|
||||
private static Optional<String> getOptionalString(CSVRecord r, String key) {
|
||||
String value = r.get(key);
|
||||
if (value.isBlank()) value = null;
|
||||
return Optional.ofNullable(value);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,142 @@
|
|||
package io.github.andrewlalis.sample_lucene_search;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.file.FileVisitResult;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.SimpleFileVisitor;
|
||||
import java.nio.file.attribute.BasicFileAttributes;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class SampleSearch {
|
||||
public static void main(String[] args) throws IOException {
|
||||
List<Airport> airports = AirportParser.parseAirports(Path.of("airports.csv"));
|
||||
System.out.println("Read " + airports.size() + " airports.");
|
||||
buildIndex(airports);
|
||||
System.out.println("Built index.");
|
||||
System.out.println("Entering search-cli mode. Type a query.");
|
||||
BufferedReader reader = new BufferedReader(new InputStreamReader(System.in));
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
String rawQuery = line.strip().toLowerCase();
|
||||
if (rawQuery.equals("exit")) break;
|
||||
var results = searchAirports(rawQuery);
|
||||
int i = 1;
|
||||
for (var name : results) {
|
||||
System.out.println(" " + i++ + ". " + name);
|
||||
}
|
||||
}
|
||||
System.out.println("Done!");
|
||||
}
|
||||
|
||||
public static void buildIndex(List<Airport> airports) throws IOException {
|
||||
Path indexDir = Path.of("airports-index");
|
||||
deleteDirRecursive(indexDir);
|
||||
Files.createDirectories(indexDir);
|
||||
|
||||
try (
|
||||
Analyzer analyzer = new StandardAnalyzer();
|
||||
Directory luceneDir = FSDirectory.open(indexDir);
|
||||
IndexWriter indexWriter = new IndexWriter(luceneDir, new IndexWriterConfig(analyzer))
|
||||
) {
|
||||
for (var airport : airports) {
|
||||
Document doc = new Document();
|
||||
doc.add(new StoredField("id", airport.id()));
|
||||
doc.add(new TextField("ident", airport.ident(), Field.Store.YES));
|
||||
doc.add(new TextField("type", airport.type(), Field.Store.YES));
|
||||
doc.add(new TextField("name", airport.name(), Field.Store.YES));
|
||||
doc.add(new TextField("continent", airport.continent(), Field.Store.YES));
|
||||
doc.add(new TextField("isoCountry", airport.isoCountry(), Field.Store.YES));
|
||||
doc.add(new TextField("municipality", airport.municipality(), Field.Store.YES));
|
||||
doc.add(new IntPoint("elevationFt", airport.elevationFt().orElse(0)));
|
||||
doc.add(new StoredField("elevationFt", airport.elevationFt().orElse(0)));
|
||||
if (airport.wikipediaLink().isPresent()) {
|
||||
doc.add(new StoredField("wikipediaLink", airport.wikipediaLink().get()));
|
||||
}
|
||||
indexWriter.addDocument(doc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static List<String> searchAirports(String rawQuery) {
|
||||
Path indexDir = Path.of("airports-index");
|
||||
// If the query is empty or there's no index, quit right away.
|
||||
if (rawQuery == null || rawQuery.isBlank() || Files.notExists(indexDir)) return new ArrayList<>();
|
||||
|
||||
// Prepare a weight for each of the fields we want to search on.
|
||||
Map<String, Float> fieldWeights = Map.of(
|
||||
"name", 3f,
|
||||
"municipality", 2f,
|
||||
"ident", 2f,
|
||||
"type", 1f,
|
||||
"continent", 0.25f
|
||||
);
|
||||
|
||||
// Build a boolean query made up of "boosted" wildcard term queries, that'll match any term.
|
||||
BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
|
||||
String[] terms = rawQuery.toLowerCase().split("\\s+");
|
||||
for (String term : terms) {
|
||||
String wildcardTerm = term + "*";
|
||||
for (var entry : fieldWeights.entrySet()) {
|
||||
String fieldName = entry.getKey();
|
||||
float weight = entry.getValue();
|
||||
Query baseQuery = new WildcardQuery(new Term(fieldName, wildcardTerm));
|
||||
queryBuilder.add(new BoostQuery(baseQuery, weight), BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
}
|
||||
Query query = queryBuilder.build();
|
||||
|
||||
// Use the query we built to fetch up to 10 results.
|
||||
try (var reader = DirectoryReader.open(FSDirectory.open(indexDir))) {
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
List<String> results = new ArrayList<>(10);
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.RELEVANCE, false);
|
||||
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
|
||||
Document doc = searcher.storedFields().document(scoreDoc.doc);
|
||||
results.add(doc.get("name"));
|
||||
}
|
||||
return results;
|
||||
} catch (IOException e) {
|
||||
System.err.println("Failed to search index.");
|
||||
e.printStackTrace();
|
||||
return new ArrayList<>();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function that removes a directory and its contents recursively.
|
||||
* @param dir The directory to remove.
|
||||
* @throws IOException If an error occurs.
|
||||
*/
|
||||
private static void deleteDirRecursive(Path dir) throws IOException {
|
||||
if (Files.notExists(dir)) return;
|
||||
Files.walkFileTree(dir, new SimpleFileVisitor<>() {
|
||||
@Override
|
||||
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
|
||||
Files.delete(file);
|
||||
return FileVisitResult.CONTINUE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException {
|
||||
Files.delete(dir);
|
||||
return FileVisitResult.CONTINUE;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue