Improved searcher with wildcards.
This commit is contained in:
parent
470842172c
commit
a0ffb9c166
|
@ -3,3 +3,7 @@
|
||||||
A simple search API for Gymboard, backed by Apache Lucene. This application includes both indexing of Gyms and other searchable entities, and a public web interface for searching those indexes.
|
A simple search API for Gymboard, backed by Apache Lucene. This application includes both indexing of Gyms and other searchable entities, and a public web interface for searching those indexes.
|
||||||
|
|
||||||
This application is configured with read-only access to the central Gymboard database, for its indexing operations.
|
This application is configured with read-only access to the central Gymboard database, for its indexing operations.
|
||||||
|
|
||||||
|
## Developing
|
||||||
|
|
||||||
|
Currently, this application is designed to boot up and immediately read the latest data from the Gymboard API's database to rebuild its indexes.
|
||||||
|
|
|
@ -50,8 +50,10 @@ public class GymIndexGenerator {
|
||||||
String streetAddress = resultSet.getString("street_address");
|
String streetAddress = resultSet.getString("street_address");
|
||||||
BigDecimal latitude = resultSet.getBigDecimal("latitude");
|
BigDecimal latitude = resultSet.getBigDecimal("latitude");
|
||||||
BigDecimal longitude = resultSet.getBigDecimal("longitude");
|
BigDecimal longitude = resultSet.getBigDecimal("longitude");
|
||||||
|
String gymCompoundId = String.format("%s/%s/%s", countryCode, cityShortName, shortName);
|
||||||
|
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
|
doc.add(new StoredField("compound_id", gymCompoundId));
|
||||||
doc.add(new TextField("short_name", shortName, Field.Store.YES));
|
doc.add(new TextField("short_name", shortName, Field.Store.YES));
|
||||||
doc.add(new TextField("display_name", displayName, Field.Store.YES));
|
doc.add(new TextField("display_name", displayName, Field.Store.YES));
|
||||||
doc.add(new TextField("city_short_name", cityShortName, Field.Store.YES));
|
doc.add(new TextField("city_short_name", cityShortName, Field.Store.YES));
|
||||||
|
|
|
@ -10,34 +10,42 @@ import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Searcher that uses a Lucene {@link IndexSearcher} to search for gyms using
|
||||||
|
* a query that's built from a weighted list of wildcard search terms.
|
||||||
|
* <ol>
|
||||||
|
* <li>If the query is blank, return an empty list.</li>
|
||||||
|
* <li>Split the query into words, append the wildcard '*' to each word.</li>
|
||||||
|
* <li>For each word, add a boosted wildcard query for each weighted field.</li>
|
||||||
|
* </ol>
|
||||||
|
*/
|
||||||
@Service
|
@Service
|
||||||
public class GymIndexSearcher {
|
public class GymIndexSearcher {
|
||||||
public List<GymResponse> searchGyms(String rawQuery) {
|
public List<GymResponse> searchGyms(String rawQuery) {
|
||||||
if (rawQuery == null || rawQuery.isBlank()) return Collections.emptyList();
|
if (rawQuery == null || rawQuery.isBlank()) return Collections.emptyList();
|
||||||
String[] terms = rawQuery.split("\\s+");
|
String[] terms = rawQuery.split("\\s+");
|
||||||
BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
|
BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
|
||||||
String[] searchableFields = {
|
Map<String, Float> fieldWeights = new HashMap<>();
|
||||||
"short_name",
|
fieldWeights.put("short_name", 3f);
|
||||||
"display_name",
|
fieldWeights.put("display_name", 3f);
|
||||||
"city_short_name",
|
fieldWeights.put("city_short_name", 1f);
|
||||||
"city_name",
|
fieldWeights.put("city_name", 1f);
|
||||||
"country_code",
|
fieldWeights.put("country_code", 0.25f);
|
||||||
"country_name",
|
fieldWeights.put("country_name", 0.5f);
|
||||||
"street_address"
|
fieldWeights.put("street_address", 0.1f);
|
||||||
};
|
|
||||||
for (String term : terms) {
|
for (String term : terms) {
|
||||||
for (String field : searchableFields) {
|
String searchTerm = term.strip() + "*";
|
||||||
queryBuilder.add(new TermQuery(new Term(field, term)), BooleanClause.Occur.SHOULD);
|
for (var entry : fieldWeights.entrySet()) {
|
||||||
|
Query baseQuery = new WildcardQuery(new Term(entry.getKey(), searchTerm));
|
||||||
|
queryBuilder.add(new BoostQuery(baseQuery, entry.getValue()), BooleanClause.Occur.SHOULD);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
BooleanQuery query = queryBuilder.build();
|
BooleanQuery query = queryBuilder.build();
|
||||||
Path gymIndexDir = Path.of("gym-index");
|
Path gymIndexDir = Path.of("gym-index");
|
||||||
try (
|
try (
|
||||||
var reader = DirectoryReader.open(FSDirectory.open(gymIndexDir));
|
var reader = DirectoryReader.open(FSDirectory.open(gymIndexDir))
|
||||||
) {
|
) {
|
||||||
IndexSearcher searcher = new IndexSearcher(reader);
|
IndexSearcher searcher = new IndexSearcher(reader);
|
||||||
List<GymResponse> results = new ArrayList<>(10);
|
List<GymResponse> results = new ArrayList<>(10);
|
||||||
|
|
Loading…
Reference in New Issue