Added initial index impl.
This commit is contained in:
parent
5a15c4d618
commit
eb4a66e039
|
@ -0,0 +1,4 @@
|
||||||
|
*.iml
|
||||||
|
.idea/
|
||||||
|
target/
|
||||||
|
package-index/
|
|
@ -0,0 +1,33 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<groupId>io.github.andrewlalis</groupId>
|
||||||
|
<artifactId>dub-registry-search</artifactId>
|
||||||
|
<version>1.0.0-SNAPSHOT</version>
|
||||||
|
|
||||||
|
<properties>
|
||||||
|
<maven.compiler.source>17</maven.compiler.source>
|
||||||
|
<maven.compiler.target>17</maven.compiler.target>
|
||||||
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
|
</properties>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-core -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.lucene</groupId>
|
||||||
|
<artifactId>lucene-core</artifactId>
|
||||||
|
<version>9.5.0</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-databind -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
|
<artifactId>jackson-databind</artifactId>
|
||||||
|
<version>2.14.2</version>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
</project>
|
|
@ -0,0 +1,43 @@
|
||||||
|
package io.github.andrewlalis.dub_registry_search;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.fasterxml.jackson.databind.node.ArrayNode;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.net.http.HttpClient;
|
||||||
|
import java.net.http.HttpRequest;
|
||||||
|
import java.net.http.HttpResponse;
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.util.zip.GZIPInputStream;
|
||||||
|
|
||||||
|
public class DubPackageFetcher implements PackageFetcher {
|
||||||
|
private final HttpClient httpClient = HttpClient.newBuilder()
|
||||||
|
.connectTimeout(Duration.ofSeconds(3))
|
||||||
|
.followRedirects(HttpClient.Redirect.NORMAL)
|
||||||
|
.build();
|
||||||
|
private static final String API_URL = "https://code.dlang.org/api/packages/dump";
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ArrayNode fetch() throws IOException {
|
||||||
|
HttpRequest req = HttpRequest.newBuilder(URI.create(API_URL))
|
||||||
|
.GET()
|
||||||
|
.timeout(Duration.ofSeconds(60))
|
||||||
|
.header("Accept", "application/json")
|
||||||
|
.header("Accept-Encoding", "gzip")
|
||||||
|
.build();
|
||||||
|
try {
|
||||||
|
HttpResponse<InputStream> response = httpClient.send(req, HttpResponse.BodyHandlers.ofInputStream());
|
||||||
|
if (response.statusCode() != 200) {
|
||||||
|
throw new IOException("Response status code " + response.statusCode());
|
||||||
|
}
|
||||||
|
ObjectMapper mapper = new ObjectMapper();
|
||||||
|
try (var in = new GZIPInputStream(response.body())) {
|
||||||
|
return mapper.readValue(in, ArrayNode.class);
|
||||||
|
}
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,43 @@
|
||||||
|
package io.github.andrewlalis.dub_registry_search;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.JsonNode;
|
||||||
|
import com.fasterxml.jackson.databind.node.ArrayNode;
|
||||||
|
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.time.Instant;
|
||||||
|
|
||||||
|
public class DubRegistrySearch {
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
if (args.length == 1 && args[0].strip().equalsIgnoreCase("index")) {
|
||||||
|
buildIndex();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void buildIndex() throws Exception {
|
||||||
|
System.out.println("Building package index.");
|
||||||
|
PackageFetcher fetcher = new DubPackageFetcher();
|
||||||
|
System.out.println("Fetching packages...");
|
||||||
|
ArrayNode packagesArray = fetcher.fetch();
|
||||||
|
int docCount = 0;
|
||||||
|
Duration indexDuration;
|
||||||
|
try (var indexer = new LucenePackageIndexer(Path.of("package-index"))) {
|
||||||
|
Instant start = Instant.now();
|
||||||
|
for (JsonNode node : packagesArray) {
|
||||||
|
if (node.isObject()) {
|
||||||
|
try {
|
||||||
|
indexer.addToIndex((ObjectNode) node);
|
||||||
|
docCount++;
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Instant end = Instant.now();
|
||||||
|
indexDuration = Duration.between(start, end);
|
||||||
|
}
|
||||||
|
System.out.println("Done! Added " + docCount + " packages to the index in " + indexDuration.toMillis() + " ms.");
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,51 @@
|
||||||
|
package io.github.andrewlalis.dub_registry_search;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.StoredField;
|
||||||
|
import org.apache.lucene.document.TextField;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.FSDirectory;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
|
||||||
|
public class LucenePackageIndexer implements PackageIndexer, AutoCloseable {
|
||||||
|
private final IndexWriter indexWriter;
|
||||||
|
private final Directory dir;
|
||||||
|
private final Analyzer analyzer;
|
||||||
|
|
||||||
|
public LucenePackageIndexer(Path indexPath) throws IOException {
|
||||||
|
this.dir = FSDirectory.open(indexPath);
|
||||||
|
this.analyzer = new StandardAnalyzer();
|
||||||
|
IndexWriterConfig config = new IndexWriterConfig(analyzer);
|
||||||
|
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
|
||||||
|
config.setCommitOnClose(true);
|
||||||
|
this.indexWriter = new IndexWriter(dir, config);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void addToIndex(ObjectNode packageJson) throws IOException {
|
||||||
|
String registryId = packageJson.get("_id").asText();
|
||||||
|
String name = packageJson.get("name").asText();
|
||||||
|
String dubUrl = "https://code.dlang.org/packages/" + name;
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new StoredField("registryId", registryId));
|
||||||
|
doc.add(new TextField("name", name, Field.Store.YES));
|
||||||
|
doc.add(new StoredField("dubUrl", dubUrl));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws Exception {
|
||||||
|
indexWriter.close();
|
||||||
|
analyzer.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,9 @@
|
||||||
|
package io.github.andrewlalis.dub_registry_search;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.node.ArrayNode;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public interface PackageFetcher {
|
||||||
|
ArrayNode fetch() throws IOException;
|
||||||
|
}
|
|
@ -0,0 +1,9 @@
|
||||||
|
package io.github.andrewlalis.dub_registry_search;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public interface PackageIndexer {
|
||||||
|
void addToIndex(ObjectNode packageJson) throws IOException;
|
||||||
|
}
|
Loading…
Reference in New Issue