diff --git a/gymboard-search/README.md b/gymboard-search/README.md index 0b82fc8..7f7f5e7 100644 --- a/gymboard-search/README.md +++ b/gymboard-search/README.md @@ -3,3 +3,7 @@ A simple search API for Gymboard, backed by Apache Lucene. This application includes both indexing of Gyms and other searchable entities, and a public web interface for searching those indexes. This application is configured with read-only access to the central Gymboard database, for its indexing operations. + +## Developing + +Currently, this application is designed to boot up and immediately read the latest data from the Gymboard API's database to rebuild its indexes. diff --git a/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/GymIndexGenerator.java b/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/GymIndexGenerator.java index 3193c87..d231c6b 100644 --- a/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/GymIndexGenerator.java +++ b/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/GymIndexGenerator.java @@ -50,8 +50,10 @@ public class GymIndexGenerator { String streetAddress = resultSet.getString("street_address"); BigDecimal latitude = resultSet.getBigDecimal("latitude"); BigDecimal longitude = resultSet.getBigDecimal("longitude"); + String gymCompoundId = String.format("%s/%s/%s", countryCode, cityShortName, shortName); Document doc = new Document(); + doc.add(new StoredField("compound_id", gymCompoundId)); doc.add(new TextField("short_name", shortName, Field.Store.YES)); doc.add(new TextField("display_name", displayName, Field.Store.YES)); doc.add(new TextField("city_short_name", cityShortName, Field.Store.YES)); diff --git a/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/GymIndexSearcher.java b/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/GymIndexSearcher.java index 05d9802..83b13cc 100644 --- a/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/GymIndexSearcher.java +++ b/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/GymIndexSearcher.java @@ -10,34 +10,42 @@ import org.springframework.stereotype.Service; import java.io.IOException; import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; +import java.util.*; +/** + * Searcher that uses a Lucene {@link IndexSearcher} to search for gyms using + * a query that's built from a weighted list of wildcard search terms. + *
    + *
  1. If the query is blank, return an empty list.
  2. + *
  3. Split the query into words, append the wildcard '*' to each word.
  4. + *
  5. For each word, add a boosted wildcard query for each weighted field.
  6. + *
+ */ @Service public class GymIndexSearcher { public List searchGyms(String rawQuery) { if (rawQuery == null || rawQuery.isBlank()) return Collections.emptyList(); String[] terms = rawQuery.split("\\s+"); BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); - String[] searchableFields = { - "short_name", - "display_name", - "city_short_name", - "city_name", - "country_code", - "country_name", - "street_address" - }; + Map fieldWeights = new HashMap<>(); + fieldWeights.put("short_name", 3f); + fieldWeights.put("display_name", 3f); + fieldWeights.put("city_short_name", 1f); + fieldWeights.put("city_name", 1f); + fieldWeights.put("country_code", 0.25f); + fieldWeights.put("country_name", 0.5f); + fieldWeights.put("street_address", 0.1f); for (String term : terms) { - for (String field : searchableFields) { - queryBuilder.add(new TermQuery(new Term(field, term)), BooleanClause.Occur.SHOULD); + String searchTerm = term.strip() + "*"; + for (var entry : fieldWeights.entrySet()) { + Query baseQuery = new WildcardQuery(new Term(entry.getKey(), searchTerm)); + queryBuilder.add(new BoostQuery(baseQuery, entry.getValue()), BooleanClause.Occur.SHOULD); } } BooleanQuery query = queryBuilder.build(); Path gymIndexDir = Path.of("gym-index"); try ( - var reader = DirectoryReader.open(FSDirectory.open(gymIndexDir)); + var reader = DirectoryReader.open(FSDirectory.open(gymIndexDir)) ) { IndexSearcher searcher = new IndexSearcher(reader); List results = new ArrayList<>(10);