Added initial index impl.

This commit is contained in:
Andrew Lalis 2023-03-19 09:59:42 +01:00
parent 5a15c4d618
commit eb4a66e039
7 changed files with 192 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
*.iml
.idea/
target/
package-index/

33
pom.xml Normal file
View File

@ -0,0 +1,33 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>io.github.andrewlalis</groupId>
<artifactId>dub-registry-search</artifactId>
<version>1.0.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>17</maven.compiler.source>
<maven.compiler.target>17</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-core -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>9.5.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-databind -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.14.2</version>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,43 @@
package io.github.andrewlalis.dub_registry_search;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ArrayNode;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.time.Duration;
import java.util.zip.GZIPInputStream;
public class DubPackageFetcher implements PackageFetcher {
private final HttpClient httpClient = HttpClient.newBuilder()
.connectTimeout(Duration.ofSeconds(3))
.followRedirects(HttpClient.Redirect.NORMAL)
.build();
private static final String API_URL = "https://code.dlang.org/api/packages/dump";
@Override
public ArrayNode fetch() throws IOException {
HttpRequest req = HttpRequest.newBuilder(URI.create(API_URL))
.GET()
.timeout(Duration.ofSeconds(60))
.header("Accept", "application/json")
.header("Accept-Encoding", "gzip")
.build();
try {
HttpResponse<InputStream> response = httpClient.send(req, HttpResponse.BodyHandlers.ofInputStream());
if (response.statusCode() != 200) {
throw new IOException("Response status code " + response.statusCode());
}
ObjectMapper mapper = new ObjectMapper();
try (var in = new GZIPInputStream(response.body())) {
return mapper.readValue(in, ArrayNode.class);
}
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
}

View File

@ -0,0 +1,43 @@
package io.github.andrewlalis.dub_registry_search;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import java.io.IOException;
import java.nio.file.Path;
import java.time.Duration;
import java.time.Instant;
public class DubRegistrySearch {
public static void main(String[] args) throws Exception {
if (args.length == 1 && args[0].strip().equalsIgnoreCase("index")) {
buildIndex();
}
}
public static void buildIndex() throws Exception {
System.out.println("Building package index.");
PackageFetcher fetcher = new DubPackageFetcher();
System.out.println("Fetching packages...");
ArrayNode packagesArray = fetcher.fetch();
int docCount = 0;
Duration indexDuration;
try (var indexer = new LucenePackageIndexer(Path.of("package-index"))) {
Instant start = Instant.now();
for (JsonNode node : packagesArray) {
if (node.isObject()) {
try {
indexer.addToIndex((ObjectNode) node);
docCount++;
} catch (IOException e) {
e.printStackTrace();
}
}
}
Instant end = Instant.now();
indexDuration = Duration.between(start, end);
}
System.out.println("Done! Added " + docCount + " packages to the index in " + indexDuration.toMillis() + " ms.");
}
}

View File

@ -0,0 +1,51 @@
package io.github.andrewlalis.dub_registry_search;
import com.fasterxml.jackson.databind.node.ObjectNode;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import java.io.IOException;
import java.nio.file.Path;
public class LucenePackageIndexer implements PackageIndexer, AutoCloseable {
private final IndexWriter indexWriter;
private final Directory dir;
private final Analyzer analyzer;
public LucenePackageIndexer(Path indexPath) throws IOException {
this.dir = FSDirectory.open(indexPath);
this.analyzer = new StandardAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(analyzer);
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
config.setCommitOnClose(true);
this.indexWriter = new IndexWriter(dir, config);
}
@Override
public void addToIndex(ObjectNode packageJson) throws IOException {
String registryId = packageJson.get("_id").asText();
String name = packageJson.get("name").asText();
String dubUrl = "https://code.dlang.org/packages/" + name;
Document doc = new Document();
doc.add(new StoredField("registryId", registryId));
doc.add(new TextField("name", name, Field.Store.YES));
doc.add(new StoredField("dubUrl", dubUrl));
}
@Override
public void close() throws Exception {
indexWriter.close();
analyzer.close();
dir.close();
}
}

View File

@ -0,0 +1,9 @@
package io.github.andrewlalis.dub_registry_search;
import com.fasterxml.jackson.databind.node.ArrayNode;
import java.io.IOException;
public interface PackageFetcher {
ArrayNode fetch() throws IOException;
}

View File

@ -0,0 +1,9 @@
package io.github.andrewlalis.dub_registry_search;
import com.fasterxml.jackson.databind.node.ObjectNode;
import java.io.IOException;
public interface PackageIndexer {
void addToIndex(ObjectNode packageJson) throws IOException;
}