Improved scoring, added more documentation to LucenePackageSearcher, added explanations to results.
This commit is contained in:
parent
79084dbb9e
commit
db4774938a
|
@ -2,5 +2,6 @@ package com.andrewlalis.d_package_search;
|
||||||
|
|
||||||
public record PackageSearchResult(
|
public record PackageSearchResult(
|
||||||
String name,
|
String name,
|
||||||
String url
|
String url,
|
||||||
|
String explanation
|
||||||
) {}
|
) {}
|
||||||
|
|
|
@ -20,6 +20,22 @@ import java.util.concurrent.Executors;
|
||||||
* search a Lucene index.
|
* search a Lucene index.
|
||||||
*/
|
*/
|
||||||
public class LucenePackageSearcher implements PackageSearcher {
|
public class LucenePackageSearcher implements PackageSearcher {
|
||||||
|
/**
|
||||||
|
* Factor by which we prefer results containing the entire search phrase
|
||||||
|
* instead of just a part of it.
|
||||||
|
*/
|
||||||
|
private static final float PHRASE_WEIGHT_MODIFIER = 2f;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A mapping of indexed fields, and the weight they contribute to a result's
|
||||||
|
* score, if the result contains a match for the field.
|
||||||
|
*/
|
||||||
|
private static final Map<String, Float> WEIGHTED_FIELDS = Map.of(
|
||||||
|
"name", 1f,
|
||||||
|
"description", 0.5f,
|
||||||
|
"readme", 0.25f
|
||||||
|
);
|
||||||
|
|
||||||
private final Path indexPath;
|
private final Path indexPath;
|
||||||
|
|
||||||
public LucenePackageSearcher(Path indexPath) {
|
public LucenePackageSearcher(Path indexPath) {
|
||||||
|
@ -37,7 +53,11 @@ public class LucenePackageSearcher implements PackageSearcher {
|
||||||
List<PackageSearchResult> results = new ArrayList<>(25);
|
List<PackageSearchResult> results = new ArrayList<>(25);
|
||||||
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
|
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
|
||||||
Document doc = searcher.storedFields().document(scoreDoc.doc);
|
Document doc = searcher.storedFields().document(scoreDoc.doc);
|
||||||
results.add(prepareResult(doc));
|
results.add(prepareResult(
|
||||||
|
doc,
|
||||||
|
"Search result scoring explanation:\n" +
|
||||||
|
searcher.explain(luceneQuery, scoreDoc.doc).toString()
|
||||||
|
));
|
||||||
}
|
}
|
||||||
return results;
|
return results;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
@ -55,35 +75,48 @@ public class LucenePackageSearcher implements PackageSearcher {
|
||||||
BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
|
BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
|
||||||
String[] searchTerms = queryText.toLowerCase().split("\\s+");
|
String[] searchTerms = queryText.toLowerCase().split("\\s+");
|
||||||
|
|
||||||
// We define a set of weighted fields that we will add prefix queries for.
|
|
||||||
Map<String, Float> weightedFields = Map.of(
|
|
||||||
"name", 1f,
|
|
||||||
"description", 0.5f,
|
|
||||||
"readme", 0.25f
|
|
||||||
);
|
|
||||||
|
|
||||||
// Only consider the first 5 search terms, and add a prefix query for each term for them.
|
// Only consider the first 5 search terms, and add a prefix query for each term for them.
|
||||||
for (int i = 0; i < Math.min(5, searchTerms.length); i++) {
|
for (int i = 0; i < Math.min(5, searchTerms.length); i++) {
|
||||||
for (var entry : weightedFields.entrySet()) {
|
for (var entry : WEIGHTED_FIELDS.entrySet()) {
|
||||||
String fieldName = entry.getKey();
|
String fieldName = entry.getKey();
|
||||||
float fieldWeight = entry.getValue();
|
float fieldWeight = entry.getValue();
|
||||||
Query termQuery = new BoostQuery(new PrefixQuery(new Term(fieldName, searchTerms[i])), fieldWeight);
|
Query termQuery = new BoostQuery(new PrefixQuery(new Term(fieldName, searchTerms[i])), fieldWeight);
|
||||||
queryBuilder.add(termQuery, BooleanClause.Occur.SHOULD);
|
queryBuilder.add(termQuery, BooleanClause.Occur.SHOULD);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
If there's more than one word in the search query, put an extra emphasis
|
||||||
|
on finding a match with the entire query together. We use the PhraseQuery
|
||||||
|
builder to build an ordered phrase query for each of the weighted fields.
|
||||||
|
*/
|
||||||
|
if (searchTerms.length > 1) {
|
||||||
|
for (var entry : WEIGHTED_FIELDS.entrySet()) {
|
||||||
|
String fieldName = entry.getKey();
|
||||||
|
float fieldWeight = entry.getValue();
|
||||||
|
PhraseQuery.Builder phraseQueryBuilder = new PhraseQuery.Builder();
|
||||||
|
for (int i = 0; i < searchTerms.length; i++) {
|
||||||
|
phraseQueryBuilder.add(new Term(fieldName, searchTerms[i]), i);
|
||||||
|
}
|
||||||
|
queryBuilder.add(new BoostQuery(phraseQueryBuilder.build(), fieldWeight * PHRASE_WEIGHT_MODIFIER), BooleanClause.Occur.SHOULD);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Query baseQuery = queryBuilder.build();
|
Query baseQuery = queryBuilder.build();
|
||||||
|
System.out.println("Query: " + baseQuery.toString());
|
||||||
Query boostedQuery = new BooleanQuery.Builder()
|
Query boostedQuery = new BooleanQuery.Builder()
|
||||||
.add(baseQuery, BooleanClause.Occur.MUST)
|
.add(baseQuery, BooleanClause.Occur.MUST)
|
||||||
.add(FeatureField.newSaturationQuery("features", "recency"), BooleanClause.Occur.SHOULD)
|
.add(FeatureField.newSaturationQuery("features", "recency", 0.25f, 1f/30f), BooleanClause.Occur.SHOULD)
|
||||||
.add(FeatureField.newSaturationQuery("features", "downloads"), BooleanClause.Occur.SHOULD)
|
.add(FeatureField.newSaturationQuery("features", "downloads", 0.5f, 500f), BooleanClause.Occur.SHOULD)
|
||||||
.build();
|
.build();
|
||||||
return boostedQuery;
|
return boostedQuery;
|
||||||
}
|
}
|
||||||
|
|
||||||
private PackageSearchResult prepareResult(Document doc) {
|
private PackageSearchResult prepareResult(Document doc, String explanation) {
|
||||||
return new PackageSearchResult(
|
return new PackageSearchResult(
|
||||||
doc.get("name"),
|
doc.get("name"),
|
||||||
doc.get("url")
|
doc.get("url"),
|
||||||
|
explanation
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,7 +9,7 @@
|
||||||
<h1>D Package Search</h1>
|
<h1>D Package Search</h1>
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
Use this site to search for D packages!
|
Use this site to search for D packages. It's currently a proof-of-concept for using Lucene to index and search for D packages, since it offers a lot of out-of-the-box features for things like full-text indexing, scoring based on numerical features, and explaining exactly how results were found.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<input id="search-input" type="text" placeholder="Search for a package..."/>
|
<input id="search-input" type="text" placeholder="Search for a package..."/>
|
||||||
|
@ -27,26 +27,36 @@
|
||||||
container.innerHTML = "";
|
container.innerHTML = "";
|
||||||
for (let i = 0; i < results.length; i++) {
|
for (let i = 0; i < results.length; i++) {
|
||||||
const element = document.createElement("div");
|
const element = document.createElement("div");
|
||||||
|
element.style = "border: 1px solid black; margin: 5px; padding: 5px;";
|
||||||
const header = document.createElement("h3");
|
const header = document.createElement("h3");
|
||||||
header.innerText = results[i].name;
|
header.innerText = results[i].name;
|
||||||
element.appendChild(header);
|
element.appendChild(header);
|
||||||
const link = document.createElement("a");
|
const link = document.createElement("a");
|
||||||
link.href = results[i].url;
|
link.href = results[i].url;
|
||||||
link.innerText = "Link to package";
|
link.innerText = results[i].url;
|
||||||
element.appendChild(link);
|
element.appendChild(link);
|
||||||
|
const explanation = document.createElement("pre");
|
||||||
|
explanation.innerText = results[i].explanation;
|
||||||
|
element.appendChild(explanation);
|
||||||
container.appendChild(element);
|
container.appendChild(element);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const searchInput = document.getElementById("search-input");
|
const searchInput = document.getElementById("search-input");
|
||||||
|
let searchTimeoutId = null;
|
||||||
searchInput.addEventListener("keyup", async () => {
|
searchInput.addEventListener("keyup", async () => {
|
||||||
const query = searchInput.value;
|
const query = searchInput.value;
|
||||||
|
if (searchTimeoutId) {
|
||||||
|
window.clearTimeout(searchTimeoutId);
|
||||||
|
}
|
||||||
if (query.length < 1) {
|
if (query.length < 1) {
|
||||||
showResults([]);
|
showResults([]);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const results = await fetchResults(query);
|
searchTimeoutId = window.setTimeout(async () => {
|
||||||
showResults(results);
|
const results = await fetchResults(query);
|
||||||
|
showResults(results);
|
||||||
|
}, 1000);
|
||||||
})
|
})
|
||||||
</script>
|
</script>
|
||||||
</body>
|
</body>
|
||||||
|
|
Loading…
Reference in New Issue