Improved searcher with wildcards.
This commit is contained in:
parent
df2dd1242f
commit
548d0a7397
|
@ -3,3 +3,7 @@
|
|||
A simple search API for Gymboard, backed by Apache Lucene. This application includes both indexing of Gyms and other searchable entities, and a public web interface for searching those indexes.
|
||||
|
||||
This application is configured with read-only access to the central Gymboard database, for its indexing operations.
|
||||
|
||||
## Developing
|
||||
|
||||
Currently, this application is designed to boot up and immediately read the latest data from the Gymboard API's database to rebuild its indexes.
|
||||
|
|
|
@ -50,8 +50,10 @@ public class GymIndexGenerator {
|
|||
String streetAddress = resultSet.getString("street_address");
|
||||
BigDecimal latitude = resultSet.getBigDecimal("latitude");
|
||||
BigDecimal longitude = resultSet.getBigDecimal("longitude");
|
||||
String gymCompoundId = String.format("%s/%s/%s", countryCode, cityShortName, shortName);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new StoredField("compound_id", gymCompoundId));
|
||||
doc.add(new TextField("short_name", shortName, Field.Store.YES));
|
||||
doc.add(new TextField("display_name", displayName, Field.Store.YES));
|
||||
doc.add(new TextField("city_short_name", cityShortName, Field.Store.YES));
|
||||
|
|
|
@ -10,34 +10,42 @@ import org.springframework.stereotype.Service;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Searcher that uses a Lucene {@link IndexSearcher} to search for gyms using
|
||||
* a query that's built from a weighted list of wildcard search terms.
|
||||
* <ol>
|
||||
* <li>If the query is blank, return an empty list.</li>
|
||||
* <li>Split the query into words, append the wildcard '*' to each word.</li>
|
||||
* <li>For each word, add a boosted wildcard query for each weighted field.</li>
|
||||
* </ol>
|
||||
*/
|
||||
@Service
|
||||
public class GymIndexSearcher {
|
||||
public List<GymResponse> searchGyms(String rawQuery) {
|
||||
if (rawQuery == null || rawQuery.isBlank()) return Collections.emptyList();
|
||||
String[] terms = rawQuery.split("\\s+");
|
||||
BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
|
||||
String[] searchableFields = {
|
||||
"short_name",
|
||||
"display_name",
|
||||
"city_short_name",
|
||||
"city_name",
|
||||
"country_code",
|
||||
"country_name",
|
||||
"street_address"
|
||||
};
|
||||
Map<String, Float> fieldWeights = new HashMap<>();
|
||||
fieldWeights.put("short_name", 3f);
|
||||
fieldWeights.put("display_name", 3f);
|
||||
fieldWeights.put("city_short_name", 1f);
|
||||
fieldWeights.put("city_name", 1f);
|
||||
fieldWeights.put("country_code", 0.25f);
|
||||
fieldWeights.put("country_name", 0.5f);
|
||||
fieldWeights.put("street_address", 0.1f);
|
||||
for (String term : terms) {
|
||||
for (String field : searchableFields) {
|
||||
queryBuilder.add(new TermQuery(new Term(field, term)), BooleanClause.Occur.SHOULD);
|
||||
String searchTerm = term.strip() + "*";
|
||||
for (var entry : fieldWeights.entrySet()) {
|
||||
Query baseQuery = new WildcardQuery(new Term(entry.getKey(), searchTerm));
|
||||
queryBuilder.add(new BoostQuery(baseQuery, entry.getValue()), BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
}
|
||||
BooleanQuery query = queryBuilder.build();
|
||||
Path gymIndexDir = Path.of("gym-index");
|
||||
try (
|
||||
var reader = DirectoryReader.open(FSDirectory.open(gymIndexDir));
|
||||
var reader = DirectoryReader.open(FSDirectory.open(gymIndexDir))
|
||||
) {
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
List<GymResponse> results = new ArrayList<>(10);
|
||||
|
|
Loading…
Reference in New Issue