Improved searcher with wildcards.

This commit is contained in:
Andrew Lalis 2023-01-24 17:41:20 +01:00
parent 470842172c
commit a0ffb9c166
3 changed files with 29 additions and 15 deletions

View File

@ -3,3 +3,7 @@
A simple search API for Gymboard, backed by Apache Lucene. This application includes both indexing of Gyms and other searchable entities, and a public web interface for searching those indexes.
This application is configured with read-only access to the central Gymboard database, for its indexing operations.
## Developing
Currently, this application is designed to boot up and immediately read the latest data from the Gymboard API's database to rebuild its indexes.

View File

@ -50,8 +50,10 @@ public class GymIndexGenerator {
String streetAddress = resultSet.getString("street_address");
BigDecimal latitude = resultSet.getBigDecimal("latitude");
BigDecimal longitude = resultSet.getBigDecimal("longitude");
String gymCompoundId = String.format("%s/%s/%s", countryCode, cityShortName, shortName);
Document doc = new Document();
doc.add(new StoredField("compound_id", gymCompoundId));
doc.add(new TextField("short_name", shortName, Field.Store.YES));
doc.add(new TextField("display_name", displayName, Field.Store.YES));
doc.add(new TextField("city_short_name", cityShortName, Field.Store.YES));

View File

@ -10,34 +10,42 @@ import org.springframework.stereotype.Service;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.*;
/**
* Searcher that uses a Lucene {@link IndexSearcher} to search for gyms using
* a query that's built from a weighted list of wildcard search terms.
* <ol>
* <li>If the query is blank, return an empty list.</li>
* <li>Split the query into words, append the wildcard '*' to each word.</li>
* <li>For each word, add a boosted wildcard query for each weighted field.</li>
* </ol>
*/
@Service
public class GymIndexSearcher {
public List<GymResponse> searchGyms(String rawQuery) {
if (rawQuery == null || rawQuery.isBlank()) return Collections.emptyList();
String[] terms = rawQuery.split("\\s+");
BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
String[] searchableFields = {
"short_name",
"display_name",
"city_short_name",
"city_name",
"country_code",
"country_name",
"street_address"
};
Map<String, Float> fieldWeights = new HashMap<>();
fieldWeights.put("short_name", 3f);
fieldWeights.put("display_name", 3f);
fieldWeights.put("city_short_name", 1f);
fieldWeights.put("city_name", 1f);
fieldWeights.put("country_code", 0.25f);
fieldWeights.put("country_name", 0.5f);
fieldWeights.put("street_address", 0.1f);
for (String term : terms) {
for (String field : searchableFields) {
queryBuilder.add(new TermQuery(new Term(field, term)), BooleanClause.Occur.SHOULD);
String searchTerm = term.strip() + "*";
for (var entry : fieldWeights.entrySet()) {
Query baseQuery = new WildcardQuery(new Term(entry.getKey(), searchTerm));
queryBuilder.add(new BoostQuery(baseQuery, entry.getValue()), BooleanClause.Occur.SHOULD);
}
}
BooleanQuery query = queryBuilder.build();
Path gymIndexDir = Path.of("gym-index");
try (
var reader = DirectoryReader.open(FSDirectory.open(gymIndexDir));
var reader = DirectoryReader.open(FSDirectory.open(gymIndexDir))
) {
IndexSearcher searcher = new IndexSearcher(reader);
List<GymResponse> results = new ArrayList<>(10);