Added web server and query searching (very basic).
This commit is contained in:
		
							parent
							
								
									eb4a66e039
								
							
						
					
					
						commit
						ddc69c1c68
					
				| 
						 | 
				
			
			@ -1,2 +1,7 @@
 | 
			
		|||
# dub-registry-search
 | 
			
		||||
A search implementation for code.dlang.org
 | 
			
		||||
# D Package Search
 | 
			
		||||
 | 
			
		||||
An indexer and search API for D programming language packages as registered on https://code.dlang.org, using Apache Lucene.
 | 
			
		||||
 | 
			
		||||
## Setup
 | 
			
		||||
 | 
			
		||||
To set up and run the program, all you need is Java version 21 or higher, and then run the project using your favorite IDE. It will boot up a web server that you can use to search for packages at http://localhost:8080/search?query=test, replacing `query=test` with what you want to search for.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										20
									
								
								pom.xml
								
								
								
								
							
							
						
						
									
										20
									
								
								pom.xml
								
								
								
								
							| 
						 | 
				
			
			@ -4,13 +4,13 @@
 | 
			
		|||
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 | 
			
		||||
    <modelVersion>4.0.0</modelVersion>
 | 
			
		||||
 | 
			
		||||
    <groupId>io.github.andrewlalis</groupId>
 | 
			
		||||
    <artifactId>dub-registry-search</artifactId>
 | 
			
		||||
    <groupId>com.andrewlalis</groupId>
 | 
			
		||||
    <artifactId>d-package-search</artifactId>
 | 
			
		||||
    <version>1.0.0-SNAPSHOT</version>
 | 
			
		||||
 | 
			
		||||
    <properties>
 | 
			
		||||
        <maven.compiler.source>17</maven.compiler.source>
 | 
			
		||||
        <maven.compiler.target>17</maven.compiler.target>
 | 
			
		||||
        <maven.compiler.source>21</maven.compiler.source>
 | 
			
		||||
        <maven.compiler.target>21</maven.compiler.target>
 | 
			
		||||
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 | 
			
		||||
    </properties>
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -19,15 +19,23 @@
 | 
			
		|||
        <dependency>
 | 
			
		||||
            <groupId>org.apache.lucene</groupId>
 | 
			
		||||
            <artifactId>lucene-core</artifactId>
 | 
			
		||||
            <version>9.5.0</version>
 | 
			
		||||
            <version>9.8.0</version>
 | 
			
		||||
        </dependency>
 | 
			
		||||
 | 
			
		||||
        <!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-databind -->
 | 
			
		||||
        <dependency>
 | 
			
		||||
            <groupId>com.fasterxml.jackson.core</groupId>
 | 
			
		||||
            <artifactId>jackson-databind</artifactId>
 | 
			
		||||
            <version>2.14.2</version>
 | 
			
		||||
            <version>2.15.1</version>
 | 
			
		||||
        </dependency>
 | 
			
		||||
 | 
			
		||||
        <!-- https://mvnrepository.com/artifact/org.eclipse.jetty/jetty-server -->
 | 
			
		||||
        <dependency>
 | 
			
		||||
            <groupId>org.eclipse.jetty</groupId>
 | 
			
		||||
            <artifactId>jetty-server</artifactId>
 | 
			
		||||
            <version>12.0.1</version>
 | 
			
		||||
        </dependency>
 | 
			
		||||
 | 
			
		||||
    </dependencies>
 | 
			
		||||
 | 
			
		||||
</project>
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,39 @@
 | 
			
		|||
package com.andrewlalis.d_package_search;
 | 
			
		||||
 | 
			
		||||
import com.andrewlalis.d_package_search.impl.DubRegistryPackageFetcher;
 | 
			
		||||
import com.andrewlalis.d_package_search.impl.LucenePackageIndexer;
 | 
			
		||||
import com.andrewlalis.d_package_search.impl.LucenePackageSearcher;
 | 
			
		||||
 | 
			
		||||
import java.nio.file.Path;
 | 
			
		||||
import java.time.Duration;
 | 
			
		||||
import java.util.ArrayList;
 | 
			
		||||
 | 
			
		||||
public class DPackageSearch {
 | 
			
		||||
	public static void main(String[] args) {
 | 
			
		||||
		Path indexPath = Path.of("package-index");
 | 
			
		||||
		startIndexerThread(new IndexGenerator(
 | 
			
		||||
				new DubRegistryPackageFetcher(),
 | 
			
		||||
				() -> new LucenePackageIndexer(indexPath)
 | 
			
		||||
		));
 | 
			
		||||
		new WebApiRunner(new LucenePackageSearcher(indexPath)).run();
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Starts a new (virtual) thread that periodically re-generates the package
 | 
			
		||||
	 * index.
 | 
			
		||||
	 * @param indexGenerator The index generator to use.
 | 
			
		||||
	 */
 | 
			
		||||
	public static void startIndexerThread(IndexGenerator indexGenerator) {
 | 
			
		||||
		Thread.ofVirtual().start(() -> {
 | 
			
		||||
			while (true) {
 | 
			
		||||
				indexGenerator.run();
 | 
			
		||||
				try {
 | 
			
		||||
					Thread.sleep(Duration.ofMinutes(1));
 | 
			
		||||
				} catch (InterruptedException e) {
 | 
			
		||||
					System.err.println("Indexing thread interrupted: " + e.getMessage());
 | 
			
		||||
					break;
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		});
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,45 @@
 | 
			
		|||
package com.andrewlalis.d_package_search;
 | 
			
		||||
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
import java.time.Duration;
 | 
			
		||||
import java.time.Instant;
 | 
			
		||||
import java.util.Collection;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * The index generator is a component that pieces all the parts of building an
 | 
			
		||||
 * index together into one runnable. It fetches packages using a fetcher, then
 | 
			
		||||
 * indexes them using an indexer obtained from the given supplier.
 | 
			
		||||
 * @param fetcher The fetcher to use to get packages.
 | 
			
		||||
 * @param indexerSupplier A supplier for a package indexer.
 | 
			
		||||
 */
 | 
			
		||||
public record IndexGenerator(
 | 
			
		||||
        PackageFetcher fetcher,
 | 
			
		||||
        ThrowableSupplier<PackageIndexer> indexerSupplier
 | 
			
		||||
) implements Runnable {
 | 
			
		||||
    @Override
 | 
			
		||||
    public void run() {
 | 
			
		||||
        System.out.println("Generating index...");
 | 
			
		||||
        Instant start;
 | 
			
		||||
        Duration dur;
 | 
			
		||||
        start = Instant.now();
 | 
			
		||||
        Collection<PackageInfo> packages;
 | 
			
		||||
        try {
 | 
			
		||||
            packages = fetcher.fetch();
 | 
			
		||||
        } catch (IOException e) {
 | 
			
		||||
            System.err.println("Failed to fetch packages: " + e.getMessage());
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
        try (PackageIndexer indexer = indexerSupplier.get()) {
 | 
			
		||||
            dur = Duration.between(start, Instant.now());
 | 
			
		||||
            System.out.println("Fetched " + packages.size() + " in " + dur.toMillis() + " ms.");
 | 
			
		||||
            start = Instant.now();
 | 
			
		||||
            for (var pkg : packages) {
 | 
			
		||||
                indexer.addToIndex(pkg);
 | 
			
		||||
            }
 | 
			
		||||
            dur = Duration.between(start, Instant.now());
 | 
			
		||||
            System.out.println("Indexed all packages in " + dur.toMillis() + " ms.");
 | 
			
		||||
        } catch (Exception e) {
 | 
			
		||||
            throw new RuntimeException(e);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,11 @@
 | 
			
		|||
package com.andrewlalis.d_package_search;
 | 
			
		||||
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
import java.util.Collection;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * A component responsible for fetching up-to-date information about packages.
 | 
			
		||||
 */
 | 
			
		||||
public interface PackageFetcher {
 | 
			
		||||
	Collection<PackageInfo> fetch() throws IOException;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,12 @@
 | 
			
		|||
package com.andrewlalis.d_package_search;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * A package indexer writes information from a given JSON package object to an
 | 
			
		||||
 * index for searching later.
 | 
			
		||||
 */
 | 
			
		||||
public interface PackageIndexer extends AutoCloseable {
 | 
			
		||||
	void addToIndex(PackageInfo info) throws Exception;
 | 
			
		||||
 | 
			
		||||
	@Override
 | 
			
		||||
	default void close() throws Exception {}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,33 @@
 | 
			
		|||
package com.andrewlalis.d_package_search;
 | 
			
		||||
 | 
			
		||||
import java.time.LocalDateTime;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Information about a D package that is ready for indexing.
 | 
			
		||||
 * @param name The name of the package.
 | 
			
		||||
 * @param categories The list of categories the package is in.
 | 
			
		||||
 * @param versions The known list of versions for this package.
 | 
			
		||||
 */
 | 
			
		||||
public record PackageInfo(
 | 
			
		||||
        String name,
 | 
			
		||||
        String[] categories,
 | 
			
		||||
        VersionInfo[] versions
 | 
			
		||||
) {
 | 
			
		||||
    /**
 | 
			
		||||
     * Information about a specific version of a D package.
 | 
			
		||||
     * @param timestamp The timestamp (in UTC) when the version was published.
 | 
			
		||||
     * @param versionTag The version tag string (e.g. "1.2.3").
 | 
			
		||||
     * @param description The version's description, or null.
 | 
			
		||||
     * @param license The version's license name (like "MIT" or "LGPL"), or null.
 | 
			
		||||
     * @param authors The list of authors for this version.
 | 
			
		||||
     * @param readmeText The text content of this version's README file.
 | 
			
		||||
     */
 | 
			
		||||
    public record VersionInfo(
 | 
			
		||||
            LocalDateTime timestamp,
 | 
			
		||||
            String versionTag,
 | 
			
		||||
            String description,
 | 
			
		||||
            String license,
 | 
			
		||||
            String[] authors,
 | 
			
		||||
            String readmeText
 | 
			
		||||
    ) {}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,6 @@
 | 
			
		|||
package com.andrewlalis.d_package_search;
 | 
			
		||||
 | 
			
		||||
public record PackageSearchResult(
 | 
			
		||||
        String name,
 | 
			
		||||
        String url
 | 
			
		||||
) {}
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,7 @@
 | 
			
		|||
package com.andrewlalis.d_package_search;
 | 
			
		||||
 | 
			
		||||
import java.util.SequencedCollection;
 | 
			
		||||
 | 
			
		||||
public interface PackageSearcher {
 | 
			
		||||
    SequencedCollection<PackageSearchResult> search(String query);
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,6 @@
 | 
			
		|||
package com.andrewlalis.d_package_search;
 | 
			
		||||
 | 
			
		||||
@FunctionalInterface
 | 
			
		||||
public interface ThrowableSupplier<T> {
 | 
			
		||||
    T get() throws Exception;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,88 @@
 | 
			
		|||
package com.andrewlalis.d_package_search;
 | 
			
		||||
 | 
			
		||||
import com.fasterxml.jackson.databind.ObjectMapper;
 | 
			
		||||
import org.eclipse.jetty.http.HttpStatus;
 | 
			
		||||
import org.eclipse.jetty.http.HttpURI;
 | 
			
		||||
import org.eclipse.jetty.server.*;
 | 
			
		||||
import org.eclipse.jetty.util.Callback;
 | 
			
		||||
import org.eclipse.jetty.util.thread.QueuedThreadPool;
 | 
			
		||||
 | 
			
		||||
import java.net.URLDecoder;
 | 
			
		||||
import java.nio.ByteBuffer;
 | 
			
		||||
import java.nio.charset.StandardCharsets;
 | 
			
		||||
import java.util.SequencedCollection;
 | 
			
		||||
import java.util.concurrent.Executor;
 | 
			
		||||
import java.util.concurrent.Executors;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Component that runs a simple HTTP endpoint, defaulting to localhost:8080/search?query=...
 | 
			
		||||
 * that allows clients to search the index via an HTTP request.
 | 
			
		||||
 */
 | 
			
		||||
public final class WebApiRunner extends Handler.Abstract implements Runnable {
 | 
			
		||||
    private final PackageSearcher packageSearcher;
 | 
			
		||||
    private final ObjectMapper objectMapper;
 | 
			
		||||
    private final Executor threadPoolExecutor;
 | 
			
		||||
 | 
			
		||||
    public WebApiRunner(PackageSearcher packageSearcher) {
 | 
			
		||||
        this.packageSearcher = packageSearcher;
 | 
			
		||||
        this.objectMapper = new ObjectMapper();
 | 
			
		||||
        this.threadPoolExecutor = Executors.newVirtualThreadPerTaskExecutor();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    @Override
 | 
			
		||||
    public void run() {
 | 
			
		||||
        QueuedThreadPool threadPool = new QueuedThreadPool();
 | 
			
		||||
        threadPool.setVirtualThreadsExecutor(threadPoolExecutor);
 | 
			
		||||
        threadPool.setName("http-server");
 | 
			
		||||
        Server server = new Server(threadPool);
 | 
			
		||||
        ServerConnector connector = new ServerConnector(server);
 | 
			
		||||
        connector.setPort(8080);
 | 
			
		||||
        server.addConnector(connector);
 | 
			
		||||
        server.setHandler(this);
 | 
			
		||||
        try {
 | 
			
		||||
            server.start();
 | 
			
		||||
        } catch (Exception e) {
 | 
			
		||||
            throw new RuntimeException(e);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    @Override
 | 
			
		||||
    public boolean handle(Request request, Response response, Callback callback) throws Exception {
 | 
			
		||||
        if (request.getMethod().equalsIgnoreCase("GET")) {
 | 
			
		||||
            HttpURI uri = request.getHttpURI();
 | 
			
		||||
            if (uri.getPath().equalsIgnoreCase("/search")) {
 | 
			
		||||
                String query = uri.getQuery() == null ? null : parseQuery(uri);
 | 
			
		||||
                if (query == null || query.isBlank()) {
 | 
			
		||||
                    response.setStatus(HttpStatus.BAD_REQUEST_400);
 | 
			
		||||
                    response.write(true, ByteBuffer.wrap("Missing required \"query\" parameter.".getBytes(StandardCharsets.UTF_8)), callback);
 | 
			
		||||
                } else {
 | 
			
		||||
                    System.out.println("Searching with query \"" + query + "\".");
 | 
			
		||||
                    SequencedCollection<PackageSearchResult> results = packageSearcher.search(query);
 | 
			
		||||
                    response.setStatus(HttpStatus.OK_200);
 | 
			
		||||
                    response.getHeaders().add("Content-Type", "application/json; charset=utf-8");
 | 
			
		||||
                    byte[] responseBody = objectMapper.writeValueAsBytes(results);
 | 
			
		||||
                    response.write(true, ByteBuffer.wrap(responseBody), callback);
 | 
			
		||||
                }
 | 
			
		||||
            } else {
 | 
			
		||||
                response.setStatus(HttpStatus.NOT_FOUND_404);
 | 
			
		||||
            }
 | 
			
		||||
        } else {
 | 
			
		||||
            response.setStatus(HttpStatus.METHOD_NOT_ALLOWED_405);
 | 
			
		||||
        }
 | 
			
		||||
        callback.succeeded();
 | 
			
		||||
        return true;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    private static String parseQuery(HttpURI uri) {
 | 
			
		||||
        for (String pair : URLDecoder.decode(uri.getQuery(), StandardCharsets.UTF_8).split("&")) {
 | 
			
		||||
            int idx = pair.indexOf('=');
 | 
			
		||||
            if (idx != -1) {
 | 
			
		||||
                String key = pair.substring(0, idx);
 | 
			
		||||
                if (key.trim().equalsIgnoreCase("query")) {
 | 
			
		||||
                    return pair.substring(idx + 1).trim().toUpperCase();
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        return null;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,107 @@
 | 
			
		|||
package com.andrewlalis.d_package_search.impl;
 | 
			
		||||
 | 
			
		||||
import com.andrewlalis.d_package_search.PackageFetcher;
 | 
			
		||||
import com.andrewlalis.d_package_search.PackageInfo;
 | 
			
		||||
import com.fasterxml.jackson.databind.JsonNode;
 | 
			
		||||
import com.fasterxml.jackson.databind.ObjectMapper;
 | 
			
		||||
import com.fasterxml.jackson.databind.node.ArrayNode;
 | 
			
		||||
import com.fasterxml.jackson.databind.node.ObjectNode;
 | 
			
		||||
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
import java.io.InputStream;
 | 
			
		||||
import java.net.URI;
 | 
			
		||||
import java.net.http.HttpClient;
 | 
			
		||||
import java.net.http.HttpRequest;
 | 
			
		||||
import java.net.http.HttpResponse;
 | 
			
		||||
import java.time.*;
 | 
			
		||||
import java.util.ArrayList;
 | 
			
		||||
import java.util.Collection;
 | 
			
		||||
import java.util.List;
 | 
			
		||||
import java.util.function.Function;
 | 
			
		||||
import java.util.zip.GZIPInputStream;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * A package fetcher that pulls directly from the Dub registry's JSON dump.
 | 
			
		||||
 */
 | 
			
		||||
public class DubRegistryPackageFetcher implements PackageFetcher {
 | 
			
		||||
	private final HttpClient httpClient = HttpClient.newBuilder()
 | 
			
		||||
			.connectTimeout(Duration.ofSeconds(3))
 | 
			
		||||
			.followRedirects(HttpClient.Redirect.NORMAL)
 | 
			
		||||
			.build();
 | 
			
		||||
	private static final String API_URL = "https://code.dlang.org/api/packages/dump";
 | 
			
		||||
 | 
			
		||||
	@Override
 | 
			
		||||
	public Collection<PackageInfo> fetch() throws IOException {
 | 
			
		||||
		HttpRequest req = HttpRequest.newBuilder(URI.create(API_URL))
 | 
			
		||||
				.GET()
 | 
			
		||||
				.timeout(Duration.ofSeconds(60))
 | 
			
		||||
				.header("Accept", "application/json")
 | 
			
		||||
				.header("Accept-Encoding", "gzip")
 | 
			
		||||
				.build();
 | 
			
		||||
		try {
 | 
			
		||||
			HttpResponse<InputStream> response = httpClient.send(req, HttpResponse.BodyHandlers.ofInputStream());
 | 
			
		||||
			if (response.statusCode() != 200) {
 | 
			
		||||
				throw new IOException("Response status code " + response.statusCode());
 | 
			
		||||
			}
 | 
			
		||||
			ObjectMapper mapper = new ObjectMapper();
 | 
			
		||||
			try (var in = new GZIPInputStream(response.body())) {
 | 
			
		||||
				ArrayNode array = mapper.readValue(in, ArrayNode.class);
 | 
			
		||||
				Collection<PackageInfo> packages = new ArrayList<>();
 | 
			
		||||
				for (JsonNode node : array) {
 | 
			
		||||
					if (node.isObject()) {
 | 
			
		||||
						try {
 | 
			
		||||
							packages.add(parsePackage((ObjectNode) node));
 | 
			
		||||
						} catch (Exception e) {
 | 
			
		||||
							e.printStackTrace();
 | 
			
		||||
						}
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
				return packages;
 | 
			
		||||
			}
 | 
			
		||||
		} catch (InterruptedException e) {
 | 
			
		||||
			throw new RuntimeException(e);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	private PackageInfo parsePackage(ObjectNode obj) {
 | 
			
		||||
		return new PackageInfo(
 | 
			
		||||
				obj.get("name").asText(),
 | 
			
		||||
				mapJsonArray(obj.withArray("categories"), JsonNode::asText).toArray(new String[0]),
 | 
			
		||||
				mapJsonArray(obj.withArray("versions"), this::parseVersion).toArray(new PackageInfo.VersionInfo[0])
 | 
			
		||||
		);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	private PackageInfo.VersionInfo parseVersion(JsonNode node) {
 | 
			
		||||
		String description = null;
 | 
			
		||||
		String license = null;
 | 
			
		||||
		String[] authors = new String[0];
 | 
			
		||||
		if (node.hasNonNull("info")) {
 | 
			
		||||
			JsonNode infoNode = node.get("info");
 | 
			
		||||
			if (infoNode.hasNonNull("description")) {
 | 
			
		||||
				description = infoNode.get("description").asText();
 | 
			
		||||
			}
 | 
			
		||||
			if (infoNode.hasNonNull("license")) {
 | 
			
		||||
				license = infoNode.get("license").asText();
 | 
			
		||||
			}
 | 
			
		||||
			if (infoNode.hasNonNull("authors")) {
 | 
			
		||||
				authors = mapJsonArray(infoNode.withArray("authors"), JsonNode::asText).toArray(authors);
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		return new PackageInfo.VersionInfo(
 | 
			
		||||
				OffsetDateTime.parse(node.get("date").asText()).atZoneSameInstant(ZoneOffset.UTC).toLocalDateTime(),
 | 
			
		||||
				node.get("version").asText(),
 | 
			
		||||
				description,
 | 
			
		||||
				license,
 | 
			
		||||
				authors,
 | 
			
		||||
				node.get("readme").asText()
 | 
			
		||||
		);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	private static <T> List<T> mapJsonArray(ArrayNode array, Function<JsonNode, T> mapper) {
 | 
			
		||||
		List<T> list = new ArrayList<>(array.size());
 | 
			
		||||
		for (JsonNode node : array) {
 | 
			
		||||
			list.add(mapper.apply(node));
 | 
			
		||||
		}
 | 
			
		||||
		return list;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1,6 +1,7 @@
 | 
			
		|||
package io.github.andrewlalis.dub_registry_search;
 | 
			
		||||
package com.andrewlalis.d_package_search.impl;
 | 
			
		||||
 | 
			
		||||
import com.fasterxml.jackson.databind.node.ObjectNode;
 | 
			
		||||
import com.andrewlalis.d_package_search.PackageIndexer;
 | 
			
		||||
import com.andrewlalis.d_package_search.PackageInfo;
 | 
			
		||||
import org.apache.lucene.analysis.Analyzer;
 | 
			
		||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
 | 
			
		||||
import org.apache.lucene.document.Document;
 | 
			
		||||
| 
						 | 
				
			
			@ -15,7 +16,7 @@ import org.apache.lucene.store.FSDirectory;
 | 
			
		|||
import java.io.IOException;
 | 
			
		||||
import java.nio.file.Path;
 | 
			
		||||
 | 
			
		||||
public class LucenePackageIndexer implements PackageIndexer, AutoCloseable {
 | 
			
		||||
public class LucenePackageIndexer implements PackageIndexer {
 | 
			
		||||
	private final IndexWriter indexWriter;
 | 
			
		||||
	private final Directory dir;
 | 
			
		||||
	private final Analyzer analyzer;
 | 
			
		||||
| 
						 | 
				
			
			@ -29,17 +30,14 @@ public class LucenePackageIndexer implements PackageIndexer, AutoCloseable {
 | 
			
		|||
		this.indexWriter = new IndexWriter(dir, config);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	@Override
 | 
			
		||||
	public void addToIndex(ObjectNode packageJson) throws IOException {
 | 
			
		||||
		String registryId = packageJson.get("_id").asText();
 | 
			
		||||
		String name = packageJson.get("name").asText();
 | 
			
		||||
		String dubUrl = "https://code.dlang.org/packages/" + name;
 | 
			
		||||
	public void addToIndex(PackageInfo info) throws IOException {
 | 
			
		||||
		String dubUrl = "https://code.dlang.org/packages/" + info.name();
 | 
			
		||||
 | 
			
		||||
		Document doc = new Document();
 | 
			
		||||
		doc.add(new StoredField("registryId", registryId));
 | 
			
		||||
		doc.add(new TextField("name", name, Field.Store.YES));
 | 
			
		||||
		doc.add(new StoredField("dubUrl", dubUrl));
 | 
			
		||||
		doc.add(new TextField("name", info.name(), Field.Store.YES));
 | 
			
		||||
		doc.add(new StoredField("url", dubUrl));
 | 
			
		||||
		indexWriter.addDocument(doc);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	@Override
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,69 @@
 | 
			
		|||
package com.andrewlalis.d_package_search.impl;
 | 
			
		||||
 | 
			
		||||
import com.andrewlalis.d_package_search.PackageSearchResult;
 | 
			
		||||
import com.andrewlalis.d_package_search.PackageSearcher;
 | 
			
		||||
import org.apache.lucene.document.Document;
 | 
			
		||||
import org.apache.lucene.index.DirectoryReader;
 | 
			
		||||
import org.apache.lucene.index.Term;
 | 
			
		||||
import org.apache.lucene.search.*;
 | 
			
		||||
import org.apache.lucene.store.FSDirectory;
 | 
			
		||||
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
import java.nio.file.Files;
 | 
			
		||||
import java.nio.file.Path;
 | 
			
		||||
import java.util.ArrayList;
 | 
			
		||||
import java.util.Collections;
 | 
			
		||||
import java.util.List;
 | 
			
		||||
import java.util.SequencedCollection;
 | 
			
		||||
import java.util.concurrent.Executors;
 | 
			
		||||
 | 
			
		||||
public class LucenePackageSearcher implements PackageSearcher {
 | 
			
		||||
    private final Path indexPath;
 | 
			
		||||
 | 
			
		||||
    public LucenePackageSearcher(Path indexPath) {
 | 
			
		||||
        this.indexPath = indexPath;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    @Override
 | 
			
		||||
    public SequencedCollection<PackageSearchResult> search(String query) {
 | 
			
		||||
        if (query == null || query.isBlank() || Files.notExists(indexPath)) return Collections.emptyList();
 | 
			
		||||
        Query luceneQuery = buildQuery(query);
 | 
			
		||||
 | 
			
		||||
        try (DirectoryReader dirReader = DirectoryReader.open(FSDirectory.open(indexPath))) {
 | 
			
		||||
            IndexSearcher searcher = new IndexSearcher(dirReader, Executors.newVirtualThreadPerTaskExecutor());
 | 
			
		||||
            TopDocs topDocs = searcher.search(luceneQuery, 25, Sort.RELEVANCE, false);
 | 
			
		||||
            List<PackageSearchResult> results = new ArrayList<>(25);
 | 
			
		||||
            for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
 | 
			
		||||
                Document doc = searcher.storedFields().document(scoreDoc.doc);
 | 
			
		||||
                results.add(prepareResult(doc));
 | 
			
		||||
            }
 | 
			
		||||
            return results;
 | 
			
		||||
        } catch (IOException e) {
 | 
			
		||||
            System.err.println("An IOException occurred while reading index: " + e.getMessage());
 | 
			
		||||
            return Collections.emptyList();
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Builds the Lucene search query for a given textual query string.
 | 
			
		||||
     * @param queryText The query text to use.
 | 
			
		||||
     * @return The query to use.
 | 
			
		||||
     */
 | 
			
		||||
    private Query buildQuery(String queryText) {
 | 
			
		||||
        BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
 | 
			
		||||
        String[] searchTerms = queryText.toLowerCase().split("\\s+");
 | 
			
		||||
        for (String searchTerm : searchTerms) {
 | 
			
		||||
            String wildcardTerm = searchTerm + "*";
 | 
			
		||||
            Query basicQuery = new WildcardQuery(new Term("name", wildcardTerm));
 | 
			
		||||
            queryBuilder.add(new BoostQuery(basicQuery, 1f), BooleanClause.Occur.SHOULD);
 | 
			
		||||
        }
 | 
			
		||||
        return queryBuilder.build();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    private PackageSearchResult prepareResult(Document doc) {
 | 
			
		||||
        return new PackageSearchResult(
 | 
			
		||||
                doc.get("name"),
 | 
			
		||||
                doc.get("url")
 | 
			
		||||
        );
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1,43 +0,0 @@
 | 
			
		|||
package io.github.andrewlalis.dub_registry_search;
 | 
			
		||||
 | 
			
		||||
import com.fasterxml.jackson.databind.ObjectMapper;
 | 
			
		||||
import com.fasterxml.jackson.databind.node.ArrayNode;
 | 
			
		||||
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
import java.io.InputStream;
 | 
			
		||||
import java.net.URI;
 | 
			
		||||
import java.net.http.HttpClient;
 | 
			
		||||
import java.net.http.HttpRequest;
 | 
			
		||||
import java.net.http.HttpResponse;
 | 
			
		||||
import java.time.Duration;
 | 
			
		||||
import java.util.zip.GZIPInputStream;
 | 
			
		||||
 | 
			
		||||
public class DubPackageFetcher implements PackageFetcher {
 | 
			
		||||
	private final HttpClient httpClient = HttpClient.newBuilder()
 | 
			
		||||
			.connectTimeout(Duration.ofSeconds(3))
 | 
			
		||||
			.followRedirects(HttpClient.Redirect.NORMAL)
 | 
			
		||||
			.build();
 | 
			
		||||
	private static final String API_URL = "https://code.dlang.org/api/packages/dump";
 | 
			
		||||
 | 
			
		||||
	@Override
 | 
			
		||||
	public ArrayNode fetch() throws IOException {
 | 
			
		||||
		HttpRequest req = HttpRequest.newBuilder(URI.create(API_URL))
 | 
			
		||||
				.GET()
 | 
			
		||||
				.timeout(Duration.ofSeconds(60))
 | 
			
		||||
				.header("Accept", "application/json")
 | 
			
		||||
				.header("Accept-Encoding", "gzip")
 | 
			
		||||
				.build();
 | 
			
		||||
		try {
 | 
			
		||||
			HttpResponse<InputStream> response = httpClient.send(req, HttpResponse.BodyHandlers.ofInputStream());
 | 
			
		||||
			if (response.statusCode() != 200) {
 | 
			
		||||
				throw new IOException("Response status code " + response.statusCode());
 | 
			
		||||
			}
 | 
			
		||||
			ObjectMapper mapper = new ObjectMapper();
 | 
			
		||||
			try (var in = new GZIPInputStream(response.body())) {
 | 
			
		||||
				return mapper.readValue(in, ArrayNode.class);
 | 
			
		||||
			}
 | 
			
		||||
		} catch (InterruptedException e) {
 | 
			
		||||
			throw new RuntimeException(e);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1,43 +0,0 @@
 | 
			
		|||
package io.github.andrewlalis.dub_registry_search;
 | 
			
		||||
 | 
			
		||||
import com.fasterxml.jackson.databind.JsonNode;
 | 
			
		||||
import com.fasterxml.jackson.databind.node.ArrayNode;
 | 
			
		||||
import com.fasterxml.jackson.databind.node.ObjectNode;
 | 
			
		||||
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
import java.nio.file.Path;
 | 
			
		||||
import java.time.Duration;
 | 
			
		||||
import java.time.Instant;
 | 
			
		||||
 | 
			
		||||
public class DubRegistrySearch {
 | 
			
		||||
	public static void main(String[] args) throws Exception {
 | 
			
		||||
		if (args.length == 1 && args[0].strip().equalsIgnoreCase("index")) {
 | 
			
		||||
			buildIndex();
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	public static void buildIndex() throws Exception {
 | 
			
		||||
		System.out.println("Building package index.");
 | 
			
		||||
		PackageFetcher fetcher = new DubPackageFetcher();
 | 
			
		||||
		System.out.println("Fetching packages...");
 | 
			
		||||
		ArrayNode packagesArray = fetcher.fetch();
 | 
			
		||||
		int docCount = 0;
 | 
			
		||||
		Duration indexDuration;
 | 
			
		||||
		try (var indexer = new LucenePackageIndexer(Path.of("package-index"))) {
 | 
			
		||||
			Instant start = Instant.now();
 | 
			
		||||
			for (JsonNode node : packagesArray) {
 | 
			
		||||
				if (node.isObject()) {
 | 
			
		||||
					try {
 | 
			
		||||
						indexer.addToIndex((ObjectNode) node);
 | 
			
		||||
						docCount++;
 | 
			
		||||
					} catch (IOException e) {
 | 
			
		||||
						e.printStackTrace();
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
			Instant end = Instant.now();
 | 
			
		||||
			indexDuration = Duration.between(start, end);
 | 
			
		||||
		}
 | 
			
		||||
		System.out.println("Done! Added " + docCount + " packages to the index in " + indexDuration.toMillis() + " ms.");
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1,9 +0,0 @@
 | 
			
		|||
package io.github.andrewlalis.dub_registry_search;
 | 
			
		||||
 | 
			
		||||
import com.fasterxml.jackson.databind.node.ArrayNode;
 | 
			
		||||
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
 | 
			
		||||
public interface PackageFetcher {
 | 
			
		||||
	ArrayNode fetch() throws IOException;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1,9 +0,0 @@
 | 
			
		|||
package io.github.andrewlalis.dub_registry_search;
 | 
			
		||||
 | 
			
		||||
import com.fasterxml.jackson.databind.node.ObjectNode;
 | 
			
		||||
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
 | 
			
		||||
public interface PackageIndexer {
 | 
			
		||||
	void addToIndex(ObjectNode packageJson) throws IOException;
 | 
			
		||||
}
 | 
			
		||||
		Loading…
	
		Reference in New Issue