Added initial index impl.
This commit is contained in:
		
							parent
							
								
									5a15c4d618
								
							
						
					
					
						commit
						eb4a66e039
					
				| 
						 | 
				
			
			@ -0,0 +1,4 @@
 | 
			
		|||
*.iml
 | 
			
		||||
.idea/
 | 
			
		||||
target/
 | 
			
		||||
package-index/
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,33 @@
 | 
			
		|||
<?xml version="1.0" encoding="UTF-8"?>
 | 
			
		||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
 | 
			
		||||
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 | 
			
		||||
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 | 
			
		||||
    <modelVersion>4.0.0</modelVersion>
 | 
			
		||||
 | 
			
		||||
    <groupId>io.github.andrewlalis</groupId>
 | 
			
		||||
    <artifactId>dub-registry-search</artifactId>
 | 
			
		||||
    <version>1.0.0-SNAPSHOT</version>
 | 
			
		||||
 | 
			
		||||
    <properties>
 | 
			
		||||
        <maven.compiler.source>17</maven.compiler.source>
 | 
			
		||||
        <maven.compiler.target>17</maven.compiler.target>
 | 
			
		||||
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 | 
			
		||||
    </properties>
 | 
			
		||||
 | 
			
		||||
    <dependencies>
 | 
			
		||||
        <!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-core -->
 | 
			
		||||
        <dependency>
 | 
			
		||||
            <groupId>org.apache.lucene</groupId>
 | 
			
		||||
            <artifactId>lucene-core</artifactId>
 | 
			
		||||
            <version>9.5.0</version>
 | 
			
		||||
        </dependency>
 | 
			
		||||
 | 
			
		||||
        <!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-databind -->
 | 
			
		||||
        <dependency>
 | 
			
		||||
            <groupId>com.fasterxml.jackson.core</groupId>
 | 
			
		||||
            <artifactId>jackson-databind</artifactId>
 | 
			
		||||
            <version>2.14.2</version>
 | 
			
		||||
        </dependency>
 | 
			
		||||
    </dependencies>
 | 
			
		||||
 | 
			
		||||
</project>
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,43 @@
 | 
			
		|||
package io.github.andrewlalis.dub_registry_search;
 | 
			
		||||
 | 
			
		||||
import com.fasterxml.jackson.databind.ObjectMapper;
 | 
			
		||||
import com.fasterxml.jackson.databind.node.ArrayNode;
 | 
			
		||||
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
import java.io.InputStream;
 | 
			
		||||
import java.net.URI;
 | 
			
		||||
import java.net.http.HttpClient;
 | 
			
		||||
import java.net.http.HttpRequest;
 | 
			
		||||
import java.net.http.HttpResponse;
 | 
			
		||||
import java.time.Duration;
 | 
			
		||||
import java.util.zip.GZIPInputStream;
 | 
			
		||||
 | 
			
		||||
public class DubPackageFetcher implements PackageFetcher {
 | 
			
		||||
	private final HttpClient httpClient = HttpClient.newBuilder()
 | 
			
		||||
			.connectTimeout(Duration.ofSeconds(3))
 | 
			
		||||
			.followRedirects(HttpClient.Redirect.NORMAL)
 | 
			
		||||
			.build();
 | 
			
		||||
	private static final String API_URL = "https://code.dlang.org/api/packages/dump";
 | 
			
		||||
 | 
			
		||||
	@Override
 | 
			
		||||
	public ArrayNode fetch() throws IOException {
 | 
			
		||||
		HttpRequest req = HttpRequest.newBuilder(URI.create(API_URL))
 | 
			
		||||
				.GET()
 | 
			
		||||
				.timeout(Duration.ofSeconds(60))
 | 
			
		||||
				.header("Accept", "application/json")
 | 
			
		||||
				.header("Accept-Encoding", "gzip")
 | 
			
		||||
				.build();
 | 
			
		||||
		try {
 | 
			
		||||
			HttpResponse<InputStream> response = httpClient.send(req, HttpResponse.BodyHandlers.ofInputStream());
 | 
			
		||||
			if (response.statusCode() != 200) {
 | 
			
		||||
				throw new IOException("Response status code " + response.statusCode());
 | 
			
		||||
			}
 | 
			
		||||
			ObjectMapper mapper = new ObjectMapper();
 | 
			
		||||
			try (var in = new GZIPInputStream(response.body())) {
 | 
			
		||||
				return mapper.readValue(in, ArrayNode.class);
 | 
			
		||||
			}
 | 
			
		||||
		} catch (InterruptedException e) {
 | 
			
		||||
			throw new RuntimeException(e);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,43 @@
 | 
			
		|||
package io.github.andrewlalis.dub_registry_search;
 | 
			
		||||
 | 
			
		||||
import com.fasterxml.jackson.databind.JsonNode;
 | 
			
		||||
import com.fasterxml.jackson.databind.node.ArrayNode;
 | 
			
		||||
import com.fasterxml.jackson.databind.node.ObjectNode;
 | 
			
		||||
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
import java.nio.file.Path;
 | 
			
		||||
import java.time.Duration;
 | 
			
		||||
import java.time.Instant;
 | 
			
		||||
 | 
			
		||||
public class DubRegistrySearch {
 | 
			
		||||
	public static void main(String[] args) throws Exception {
 | 
			
		||||
		if (args.length == 1 && args[0].strip().equalsIgnoreCase("index")) {
 | 
			
		||||
			buildIndex();
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	public static void buildIndex() throws Exception {
 | 
			
		||||
		System.out.println("Building package index.");
 | 
			
		||||
		PackageFetcher fetcher = new DubPackageFetcher();
 | 
			
		||||
		System.out.println("Fetching packages...");
 | 
			
		||||
		ArrayNode packagesArray = fetcher.fetch();
 | 
			
		||||
		int docCount = 0;
 | 
			
		||||
		Duration indexDuration;
 | 
			
		||||
		try (var indexer = new LucenePackageIndexer(Path.of("package-index"))) {
 | 
			
		||||
			Instant start = Instant.now();
 | 
			
		||||
			for (JsonNode node : packagesArray) {
 | 
			
		||||
				if (node.isObject()) {
 | 
			
		||||
					try {
 | 
			
		||||
						indexer.addToIndex((ObjectNode) node);
 | 
			
		||||
						docCount++;
 | 
			
		||||
					} catch (IOException e) {
 | 
			
		||||
						e.printStackTrace();
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
			Instant end = Instant.now();
 | 
			
		||||
			indexDuration = Duration.between(start, end);
 | 
			
		||||
		}
 | 
			
		||||
		System.out.println("Done! Added " + docCount + " packages to the index in " + indexDuration.toMillis() + " ms.");
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,51 @@
 | 
			
		|||
package io.github.andrewlalis.dub_registry_search;
 | 
			
		||||
 | 
			
		||||
import com.fasterxml.jackson.databind.node.ObjectNode;
 | 
			
		||||
import org.apache.lucene.analysis.Analyzer;
 | 
			
		||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
 | 
			
		||||
import org.apache.lucene.document.Document;
 | 
			
		||||
import org.apache.lucene.document.Field;
 | 
			
		||||
import org.apache.lucene.document.StoredField;
 | 
			
		||||
import org.apache.lucene.document.TextField;
 | 
			
		||||
import org.apache.lucene.index.IndexWriter;
 | 
			
		||||
import org.apache.lucene.index.IndexWriterConfig;
 | 
			
		||||
import org.apache.lucene.store.Directory;
 | 
			
		||||
import org.apache.lucene.store.FSDirectory;
 | 
			
		||||
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
import java.nio.file.Path;
 | 
			
		||||
 | 
			
		||||
public class LucenePackageIndexer implements PackageIndexer, AutoCloseable {
 | 
			
		||||
	private final IndexWriter indexWriter;
 | 
			
		||||
	private final Directory dir;
 | 
			
		||||
	private final Analyzer analyzer;
 | 
			
		||||
 | 
			
		||||
	public LucenePackageIndexer(Path indexPath) throws IOException {
 | 
			
		||||
		this.dir = FSDirectory.open(indexPath);
 | 
			
		||||
		this.analyzer = new StandardAnalyzer();
 | 
			
		||||
		IndexWriterConfig config = new IndexWriterConfig(analyzer);
 | 
			
		||||
		config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
 | 
			
		||||
		config.setCommitOnClose(true);
 | 
			
		||||
		this.indexWriter = new IndexWriter(dir, config);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	@Override
 | 
			
		||||
	public void addToIndex(ObjectNode packageJson) throws IOException {
 | 
			
		||||
		String registryId = packageJson.get("_id").asText();
 | 
			
		||||
		String name = packageJson.get("name").asText();
 | 
			
		||||
		String dubUrl = "https://code.dlang.org/packages/" + name;
 | 
			
		||||
 | 
			
		||||
		Document doc = new Document();
 | 
			
		||||
		doc.add(new StoredField("registryId", registryId));
 | 
			
		||||
		doc.add(new TextField("name", name, Field.Store.YES));
 | 
			
		||||
		doc.add(new StoredField("dubUrl", dubUrl));
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	@Override
 | 
			
		||||
	public void close() throws Exception {
 | 
			
		||||
		indexWriter.close();
 | 
			
		||||
		analyzer.close();
 | 
			
		||||
		dir.close();
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,9 @@
 | 
			
		|||
package io.github.andrewlalis.dub_registry_search;
 | 
			
		||||
 | 
			
		||||
import com.fasterxml.jackson.databind.node.ArrayNode;
 | 
			
		||||
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
 | 
			
		||||
public interface PackageFetcher {
 | 
			
		||||
	ArrayNode fetch() throws IOException;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,9 @@
 | 
			
		|||
package io.github.andrewlalis.dub_registry_search;
 | 
			
		||||
 | 
			
		||||
import com.fasterxml.jackson.databind.node.ObjectNode;
 | 
			
		||||
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
 | 
			
		||||
public interface PackageIndexer {
 | 
			
		||||
	void addToIndex(ObjectNode packageJson) throws IOException;
 | 
			
		||||
}
 | 
			
		||||
		Loading…
	
		Reference in New Issue