Added web server and query searching (very basic).
This commit is contained in:
parent
eb4a66e039
commit
ddc69c1c68
|
@ -1,2 +1,7 @@
|
||||||
# dub-registry-search
|
# D Package Search
|
||||||
A search implementation for code.dlang.org
|
|
||||||
|
An indexer and search API for D programming language packages as registered on https://code.dlang.org, using Apache Lucene.
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
|
||||||
|
To set up and run the program, all you need is Java version 21 or higher, and then run the project using your favorite IDE. It will boot up a web server that you can use to search for packages at http://localhost:8080/search?query=test, replacing `query=test` with what you want to search for.
|
||||||
|
|
20
pom.xml
20
pom.xml
|
@ -4,13 +4,13 @@
|
||||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
<groupId>io.github.andrewlalis</groupId>
|
<groupId>com.andrewlalis</groupId>
|
||||||
<artifactId>dub-registry-search</artifactId>
|
<artifactId>d-package-search</artifactId>
|
||||||
<version>1.0.0-SNAPSHOT</version>
|
<version>1.0.0-SNAPSHOT</version>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<maven.compiler.source>17</maven.compiler.source>
|
<maven.compiler.source>21</maven.compiler.source>
|
||||||
<maven.compiler.target>17</maven.compiler.target>
|
<maven.compiler.target>21</maven.compiler.target>
|
||||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
|
@ -19,15 +19,23 @@
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.lucene</groupId>
|
<groupId>org.apache.lucene</groupId>
|
||||||
<artifactId>lucene-core</artifactId>
|
<artifactId>lucene-core</artifactId>
|
||||||
<version>9.5.0</version>
|
<version>9.8.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-databind -->
|
<!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-databind -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
<artifactId>jackson-databind</artifactId>
|
<artifactId>jackson-databind</artifactId>
|
||||||
<version>2.14.2</version>
|
<version>2.15.1</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- https://mvnrepository.com/artifact/org.eclipse.jetty/jetty-server -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.eclipse.jetty</groupId>
|
||||||
|
<artifactId>jetty-server</artifactId>
|
||||||
|
<version>12.0.1</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
</project>
|
</project>
|
|
@ -0,0 +1,39 @@
|
||||||
|
package com.andrewlalis.d_package_search;
|
||||||
|
|
||||||
|
import com.andrewlalis.d_package_search.impl.DubRegistryPackageFetcher;
|
||||||
|
import com.andrewlalis.d_package_search.impl.LucenePackageIndexer;
|
||||||
|
import com.andrewlalis.d_package_search.impl.LucenePackageSearcher;
|
||||||
|
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
public class DPackageSearch {
|
||||||
|
public static void main(String[] args) {
|
||||||
|
Path indexPath = Path.of("package-index");
|
||||||
|
startIndexerThread(new IndexGenerator(
|
||||||
|
new DubRegistryPackageFetcher(),
|
||||||
|
() -> new LucenePackageIndexer(indexPath)
|
||||||
|
));
|
||||||
|
new WebApiRunner(new LucenePackageSearcher(indexPath)).run();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Starts a new (virtual) thread that periodically re-generates the package
|
||||||
|
* index.
|
||||||
|
* @param indexGenerator The index generator to use.
|
||||||
|
*/
|
||||||
|
public static void startIndexerThread(IndexGenerator indexGenerator) {
|
||||||
|
Thread.ofVirtual().start(() -> {
|
||||||
|
while (true) {
|
||||||
|
indexGenerator.run();
|
||||||
|
try {
|
||||||
|
Thread.sleep(Duration.ofMinutes(1));
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
System.err.println("Indexing thread interrupted: " + e.getMessage());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,45 @@
|
||||||
|
package com.andrewlalis.d_package_search;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The index generator is a component that pieces all the parts of building an
|
||||||
|
* index together into one runnable. It fetches packages using a fetcher, then
|
||||||
|
* indexes them using an indexer obtained from the given supplier.
|
||||||
|
* @param fetcher The fetcher to use to get packages.
|
||||||
|
* @param indexerSupplier A supplier for a package indexer.
|
||||||
|
*/
|
||||||
|
public record IndexGenerator(
|
||||||
|
PackageFetcher fetcher,
|
||||||
|
ThrowableSupplier<PackageIndexer> indexerSupplier
|
||||||
|
) implements Runnable {
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
System.out.println("Generating index...");
|
||||||
|
Instant start;
|
||||||
|
Duration dur;
|
||||||
|
start = Instant.now();
|
||||||
|
Collection<PackageInfo> packages;
|
||||||
|
try {
|
||||||
|
packages = fetcher.fetch();
|
||||||
|
} catch (IOException e) {
|
||||||
|
System.err.println("Failed to fetch packages: " + e.getMessage());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try (PackageIndexer indexer = indexerSupplier.get()) {
|
||||||
|
dur = Duration.between(start, Instant.now());
|
||||||
|
System.out.println("Fetched " + packages.size() + " in " + dur.toMillis() + " ms.");
|
||||||
|
start = Instant.now();
|
||||||
|
for (var pkg : packages) {
|
||||||
|
indexer.addToIndex(pkg);
|
||||||
|
}
|
||||||
|
dur = Duration.between(start, Instant.now());
|
||||||
|
System.out.println("Indexed all packages in " + dur.toMillis() + " ms.");
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,11 @@
|
||||||
|
package com.andrewlalis.d_package_search;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A component responsible for fetching up-to-date information about packages.
|
||||||
|
*/
|
||||||
|
public interface PackageFetcher {
|
||||||
|
Collection<PackageInfo> fetch() throws IOException;
|
||||||
|
}
|
|
@ -0,0 +1,12 @@
|
||||||
|
package com.andrewlalis.d_package_search;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A package indexer writes information from a given JSON package object to an
|
||||||
|
* index for searching later.
|
||||||
|
*/
|
||||||
|
public interface PackageIndexer extends AutoCloseable {
|
||||||
|
void addToIndex(PackageInfo info) throws Exception;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
default void close() throws Exception {}
|
||||||
|
}
|
|
@ -0,0 +1,33 @@
|
||||||
|
package com.andrewlalis.d_package_search;
|
||||||
|
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Information about a D package that is ready for indexing.
|
||||||
|
* @param name The name of the package.
|
||||||
|
* @param categories The list of categories the package is in.
|
||||||
|
* @param versions The known list of versions for this package.
|
||||||
|
*/
|
||||||
|
public record PackageInfo(
|
||||||
|
String name,
|
||||||
|
String[] categories,
|
||||||
|
VersionInfo[] versions
|
||||||
|
) {
|
||||||
|
/**
|
||||||
|
* Information about a specific version of a D package.
|
||||||
|
* @param timestamp The timestamp (in UTC) when the version was published.
|
||||||
|
* @param versionTag The version tag string (e.g. "1.2.3").
|
||||||
|
* @param description The version's description, or null.
|
||||||
|
* @param license The version's license name (like "MIT" or "LGPL"), or null.
|
||||||
|
* @param authors The list of authors for this version.
|
||||||
|
* @param readmeText The text content of this version's README file.
|
||||||
|
*/
|
||||||
|
public record VersionInfo(
|
||||||
|
LocalDateTime timestamp,
|
||||||
|
String versionTag,
|
||||||
|
String description,
|
||||||
|
String license,
|
||||||
|
String[] authors,
|
||||||
|
String readmeText
|
||||||
|
) {}
|
||||||
|
}
|
|
@ -0,0 +1,6 @@
|
||||||
|
package com.andrewlalis.d_package_search;
|
||||||
|
|
||||||
|
public record PackageSearchResult(
|
||||||
|
String name,
|
||||||
|
String url
|
||||||
|
) {}
|
|
@ -0,0 +1,7 @@
|
||||||
|
package com.andrewlalis.d_package_search;
|
||||||
|
|
||||||
|
import java.util.SequencedCollection;
|
||||||
|
|
||||||
|
public interface PackageSearcher {
|
||||||
|
SequencedCollection<PackageSearchResult> search(String query);
|
||||||
|
}
|
|
@ -0,0 +1,6 @@
|
||||||
|
package com.andrewlalis.d_package_search;
|
||||||
|
|
||||||
|
@FunctionalInterface
|
||||||
|
public interface ThrowableSupplier<T> {
|
||||||
|
T get() throws Exception;
|
||||||
|
}
|
|
@ -0,0 +1,88 @@
|
||||||
|
package com.andrewlalis.d_package_search;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import org.eclipse.jetty.http.HttpStatus;
|
||||||
|
import org.eclipse.jetty.http.HttpURI;
|
||||||
|
import org.eclipse.jetty.server.*;
|
||||||
|
import org.eclipse.jetty.util.Callback;
|
||||||
|
import org.eclipse.jetty.util.thread.QueuedThreadPool;
|
||||||
|
|
||||||
|
import java.net.URLDecoder;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.SequencedCollection;
|
||||||
|
import java.util.concurrent.Executor;
|
||||||
|
import java.util.concurrent.Executors;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Component that runs a simple HTTP endpoint, defaulting to localhost:8080/search?query=...
|
||||||
|
* that allows clients to search the index via an HTTP request.
|
||||||
|
*/
|
||||||
|
public final class WebApiRunner extends Handler.Abstract implements Runnable {
|
||||||
|
private final PackageSearcher packageSearcher;
|
||||||
|
private final ObjectMapper objectMapper;
|
||||||
|
private final Executor threadPoolExecutor;
|
||||||
|
|
||||||
|
public WebApiRunner(PackageSearcher packageSearcher) {
|
||||||
|
this.packageSearcher = packageSearcher;
|
||||||
|
this.objectMapper = new ObjectMapper();
|
||||||
|
this.threadPoolExecutor = Executors.newVirtualThreadPerTaskExecutor();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
QueuedThreadPool threadPool = new QueuedThreadPool();
|
||||||
|
threadPool.setVirtualThreadsExecutor(threadPoolExecutor);
|
||||||
|
threadPool.setName("http-server");
|
||||||
|
Server server = new Server(threadPool);
|
||||||
|
ServerConnector connector = new ServerConnector(server);
|
||||||
|
connector.setPort(8080);
|
||||||
|
server.addConnector(connector);
|
||||||
|
server.setHandler(this);
|
||||||
|
try {
|
||||||
|
server.start();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean handle(Request request, Response response, Callback callback) throws Exception {
|
||||||
|
if (request.getMethod().equalsIgnoreCase("GET")) {
|
||||||
|
HttpURI uri = request.getHttpURI();
|
||||||
|
if (uri.getPath().equalsIgnoreCase("/search")) {
|
||||||
|
String query = uri.getQuery() == null ? null : parseQuery(uri);
|
||||||
|
if (query == null || query.isBlank()) {
|
||||||
|
response.setStatus(HttpStatus.BAD_REQUEST_400);
|
||||||
|
response.write(true, ByteBuffer.wrap("Missing required \"query\" parameter.".getBytes(StandardCharsets.UTF_8)), callback);
|
||||||
|
} else {
|
||||||
|
System.out.println("Searching with query \"" + query + "\".");
|
||||||
|
SequencedCollection<PackageSearchResult> results = packageSearcher.search(query);
|
||||||
|
response.setStatus(HttpStatus.OK_200);
|
||||||
|
response.getHeaders().add("Content-Type", "application/json; charset=utf-8");
|
||||||
|
byte[] responseBody = objectMapper.writeValueAsBytes(results);
|
||||||
|
response.write(true, ByteBuffer.wrap(responseBody), callback);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
response.setStatus(HttpStatus.NOT_FOUND_404);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
response.setStatus(HttpStatus.METHOD_NOT_ALLOWED_405);
|
||||||
|
}
|
||||||
|
callback.succeeded();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String parseQuery(HttpURI uri) {
|
||||||
|
for (String pair : URLDecoder.decode(uri.getQuery(), StandardCharsets.UTF_8).split("&")) {
|
||||||
|
int idx = pair.indexOf('=');
|
||||||
|
if (idx != -1) {
|
||||||
|
String key = pair.substring(0, idx);
|
||||||
|
if (key.trim().equalsIgnoreCase("query")) {
|
||||||
|
return pair.substring(idx + 1).trim().toUpperCase();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,107 @@
|
||||||
|
package com.andrewlalis.d_package_search.impl;
|
||||||
|
|
||||||
|
import com.andrewlalis.d_package_search.PackageFetcher;
|
||||||
|
import com.andrewlalis.d_package_search.PackageInfo;
|
||||||
|
import com.fasterxml.jackson.databind.JsonNode;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.fasterxml.jackson.databind.node.ArrayNode;
|
||||||
|
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.net.http.HttpClient;
|
||||||
|
import java.net.http.HttpRequest;
|
||||||
|
import java.net.http.HttpResponse;
|
||||||
|
import java.time.*;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.function.Function;
|
||||||
|
import java.util.zip.GZIPInputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A package fetcher that pulls directly from the Dub registry's JSON dump.
|
||||||
|
*/
|
||||||
|
public class DubRegistryPackageFetcher implements PackageFetcher {
|
||||||
|
private final HttpClient httpClient = HttpClient.newBuilder()
|
||||||
|
.connectTimeout(Duration.ofSeconds(3))
|
||||||
|
.followRedirects(HttpClient.Redirect.NORMAL)
|
||||||
|
.build();
|
||||||
|
private static final String API_URL = "https://code.dlang.org/api/packages/dump";
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<PackageInfo> fetch() throws IOException {
|
||||||
|
HttpRequest req = HttpRequest.newBuilder(URI.create(API_URL))
|
||||||
|
.GET()
|
||||||
|
.timeout(Duration.ofSeconds(60))
|
||||||
|
.header("Accept", "application/json")
|
||||||
|
.header("Accept-Encoding", "gzip")
|
||||||
|
.build();
|
||||||
|
try {
|
||||||
|
HttpResponse<InputStream> response = httpClient.send(req, HttpResponse.BodyHandlers.ofInputStream());
|
||||||
|
if (response.statusCode() != 200) {
|
||||||
|
throw new IOException("Response status code " + response.statusCode());
|
||||||
|
}
|
||||||
|
ObjectMapper mapper = new ObjectMapper();
|
||||||
|
try (var in = new GZIPInputStream(response.body())) {
|
||||||
|
ArrayNode array = mapper.readValue(in, ArrayNode.class);
|
||||||
|
Collection<PackageInfo> packages = new ArrayList<>();
|
||||||
|
for (JsonNode node : array) {
|
||||||
|
if (node.isObject()) {
|
||||||
|
try {
|
||||||
|
packages.add(parsePackage((ObjectNode) node));
|
||||||
|
} catch (Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return packages;
|
||||||
|
}
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private PackageInfo parsePackage(ObjectNode obj) {
|
||||||
|
return new PackageInfo(
|
||||||
|
obj.get("name").asText(),
|
||||||
|
mapJsonArray(obj.withArray("categories"), JsonNode::asText).toArray(new String[0]),
|
||||||
|
mapJsonArray(obj.withArray("versions"), this::parseVersion).toArray(new PackageInfo.VersionInfo[0])
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private PackageInfo.VersionInfo parseVersion(JsonNode node) {
|
||||||
|
String description = null;
|
||||||
|
String license = null;
|
||||||
|
String[] authors = new String[0];
|
||||||
|
if (node.hasNonNull("info")) {
|
||||||
|
JsonNode infoNode = node.get("info");
|
||||||
|
if (infoNode.hasNonNull("description")) {
|
||||||
|
description = infoNode.get("description").asText();
|
||||||
|
}
|
||||||
|
if (infoNode.hasNonNull("license")) {
|
||||||
|
license = infoNode.get("license").asText();
|
||||||
|
}
|
||||||
|
if (infoNode.hasNonNull("authors")) {
|
||||||
|
authors = mapJsonArray(infoNode.withArray("authors"), JsonNode::asText).toArray(authors);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new PackageInfo.VersionInfo(
|
||||||
|
OffsetDateTime.parse(node.get("date").asText()).atZoneSameInstant(ZoneOffset.UTC).toLocalDateTime(),
|
||||||
|
node.get("version").asText(),
|
||||||
|
description,
|
||||||
|
license,
|
||||||
|
authors,
|
||||||
|
node.get("readme").asText()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <T> List<T> mapJsonArray(ArrayNode array, Function<JsonNode, T> mapper) {
|
||||||
|
List<T> list = new ArrayList<>(array.size());
|
||||||
|
for (JsonNode node : array) {
|
||||||
|
list.add(mapper.apply(node));
|
||||||
|
}
|
||||||
|
return list;
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,6 +1,7 @@
|
||||||
package io.github.andrewlalis.dub_registry_search;
|
package com.andrewlalis.d_package_search.impl;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
import com.andrewlalis.d_package_search.PackageIndexer;
|
||||||
|
import com.andrewlalis.d_package_search.PackageInfo;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
@ -15,7 +16,7 @@ import org.apache.lucene.store.FSDirectory;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
|
||||||
public class LucenePackageIndexer implements PackageIndexer, AutoCloseable {
|
public class LucenePackageIndexer implements PackageIndexer {
|
||||||
private final IndexWriter indexWriter;
|
private final IndexWriter indexWriter;
|
||||||
private final Directory dir;
|
private final Directory dir;
|
||||||
private final Analyzer analyzer;
|
private final Analyzer analyzer;
|
||||||
|
@ -29,17 +30,14 @@ public class LucenePackageIndexer implements PackageIndexer, AutoCloseable {
|
||||||
this.indexWriter = new IndexWriter(dir, config);
|
this.indexWriter = new IndexWriter(dir, config);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void addToIndex(ObjectNode packageJson) throws IOException {
|
public void addToIndex(PackageInfo info) throws IOException {
|
||||||
String registryId = packageJson.get("_id").asText();
|
String dubUrl = "https://code.dlang.org/packages/" + info.name();
|
||||||
String name = packageJson.get("name").asText();
|
|
||||||
String dubUrl = "https://code.dlang.org/packages/" + name;
|
|
||||||
|
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new StoredField("registryId", registryId));
|
doc.add(new TextField("name", info.name(), Field.Store.YES));
|
||||||
doc.add(new TextField("name", name, Field.Store.YES));
|
doc.add(new StoredField("url", dubUrl));
|
||||||
doc.add(new StoredField("dubUrl", dubUrl));
|
indexWriter.addDocument(doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
|
@ -0,0 +1,69 @@
|
||||||
|
package com.andrewlalis.d_package_search.impl;
|
||||||
|
|
||||||
|
import com.andrewlalis.d_package_search.PackageSearchResult;
|
||||||
|
import com.andrewlalis.d_package_search.PackageSearcher;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.*;
|
||||||
|
import org.apache.lucene.store.FSDirectory;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.SequencedCollection;
|
||||||
|
import java.util.concurrent.Executors;
|
||||||
|
|
||||||
|
public class LucenePackageSearcher implements PackageSearcher {
|
||||||
|
private final Path indexPath;
|
||||||
|
|
||||||
|
public LucenePackageSearcher(Path indexPath) {
|
||||||
|
this.indexPath = indexPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SequencedCollection<PackageSearchResult> search(String query) {
|
||||||
|
if (query == null || query.isBlank() || Files.notExists(indexPath)) return Collections.emptyList();
|
||||||
|
Query luceneQuery = buildQuery(query);
|
||||||
|
|
||||||
|
try (DirectoryReader dirReader = DirectoryReader.open(FSDirectory.open(indexPath))) {
|
||||||
|
IndexSearcher searcher = new IndexSearcher(dirReader, Executors.newVirtualThreadPerTaskExecutor());
|
||||||
|
TopDocs topDocs = searcher.search(luceneQuery, 25, Sort.RELEVANCE, false);
|
||||||
|
List<PackageSearchResult> results = new ArrayList<>(25);
|
||||||
|
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
|
||||||
|
Document doc = searcher.storedFields().document(scoreDoc.doc);
|
||||||
|
results.add(prepareResult(doc));
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
} catch (IOException e) {
|
||||||
|
System.err.println("An IOException occurred while reading index: " + e.getMessage());
|
||||||
|
return Collections.emptyList();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Builds the Lucene search query for a given textual query string.
|
||||||
|
* @param queryText The query text to use.
|
||||||
|
* @return The query to use.
|
||||||
|
*/
|
||||||
|
private Query buildQuery(String queryText) {
|
||||||
|
BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
|
||||||
|
String[] searchTerms = queryText.toLowerCase().split("\\s+");
|
||||||
|
for (String searchTerm : searchTerms) {
|
||||||
|
String wildcardTerm = searchTerm + "*";
|
||||||
|
Query basicQuery = new WildcardQuery(new Term("name", wildcardTerm));
|
||||||
|
queryBuilder.add(new BoostQuery(basicQuery, 1f), BooleanClause.Occur.SHOULD);
|
||||||
|
}
|
||||||
|
return queryBuilder.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
private PackageSearchResult prepareResult(Document doc) {
|
||||||
|
return new PackageSearchResult(
|
||||||
|
doc.get("name"),
|
||||||
|
doc.get("url")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,43 +0,0 @@
|
||||||
package io.github.andrewlalis.dub_registry_search;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
import com.fasterxml.jackson.databind.node.ArrayNode;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.net.URI;
|
|
||||||
import java.net.http.HttpClient;
|
|
||||||
import java.net.http.HttpRequest;
|
|
||||||
import java.net.http.HttpResponse;
|
|
||||||
import java.time.Duration;
|
|
||||||
import java.util.zip.GZIPInputStream;
|
|
||||||
|
|
||||||
public class DubPackageFetcher implements PackageFetcher {
|
|
||||||
private final HttpClient httpClient = HttpClient.newBuilder()
|
|
||||||
.connectTimeout(Duration.ofSeconds(3))
|
|
||||||
.followRedirects(HttpClient.Redirect.NORMAL)
|
|
||||||
.build();
|
|
||||||
private static final String API_URL = "https://code.dlang.org/api/packages/dump";
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public ArrayNode fetch() throws IOException {
|
|
||||||
HttpRequest req = HttpRequest.newBuilder(URI.create(API_URL))
|
|
||||||
.GET()
|
|
||||||
.timeout(Duration.ofSeconds(60))
|
|
||||||
.header("Accept", "application/json")
|
|
||||||
.header("Accept-Encoding", "gzip")
|
|
||||||
.build();
|
|
||||||
try {
|
|
||||||
HttpResponse<InputStream> response = httpClient.send(req, HttpResponse.BodyHandlers.ofInputStream());
|
|
||||||
if (response.statusCode() != 200) {
|
|
||||||
throw new IOException("Response status code " + response.statusCode());
|
|
||||||
}
|
|
||||||
ObjectMapper mapper = new ObjectMapper();
|
|
||||||
try (var in = new GZIPInputStream(response.body())) {
|
|
||||||
return mapper.readValue(in, ArrayNode.class);
|
|
||||||
}
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,43 +0,0 @@
|
||||||
package io.github.andrewlalis.dub_registry_search;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.JsonNode;
|
|
||||||
import com.fasterxml.jackson.databind.node.ArrayNode;
|
|
||||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.nio.file.Path;
|
|
||||||
import java.time.Duration;
|
|
||||||
import java.time.Instant;
|
|
||||||
|
|
||||||
public class DubRegistrySearch {
|
|
||||||
public static void main(String[] args) throws Exception {
|
|
||||||
if (args.length == 1 && args[0].strip().equalsIgnoreCase("index")) {
|
|
||||||
buildIndex();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void buildIndex() throws Exception {
|
|
||||||
System.out.println("Building package index.");
|
|
||||||
PackageFetcher fetcher = new DubPackageFetcher();
|
|
||||||
System.out.println("Fetching packages...");
|
|
||||||
ArrayNode packagesArray = fetcher.fetch();
|
|
||||||
int docCount = 0;
|
|
||||||
Duration indexDuration;
|
|
||||||
try (var indexer = new LucenePackageIndexer(Path.of("package-index"))) {
|
|
||||||
Instant start = Instant.now();
|
|
||||||
for (JsonNode node : packagesArray) {
|
|
||||||
if (node.isObject()) {
|
|
||||||
try {
|
|
||||||
indexer.addToIndex((ObjectNode) node);
|
|
||||||
docCount++;
|
|
||||||
} catch (IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Instant end = Instant.now();
|
|
||||||
indexDuration = Duration.between(start, end);
|
|
||||||
}
|
|
||||||
System.out.println("Done! Added " + docCount + " packages to the index in " + indexDuration.toMillis() + " ms.");
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,9 +0,0 @@
|
||||||
package io.github.andrewlalis.dub_registry_search;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.node.ArrayNode;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
public interface PackageFetcher {
|
|
||||||
ArrayNode fetch() throws IOException;
|
|
||||||
}
|
|
|
@ -1,9 +0,0 @@
|
||||||
package io.github.andrewlalis.dub_registry_search;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
public interface PackageIndexer {
|
|
||||||
void addToIndex(ObjectNode packageJson) throws IOException;
|
|
||||||
}
|
|
Loading…
Reference in New Issue