Improved searcher with wildcards.
This commit is contained in:
		
							parent
							
								
									df2dd1242f
								
							
						
					
					
						commit
						548d0a7397
					
				| 
						 | 
				
			
			@ -3,3 +3,7 @@
 | 
			
		|||
A simple search API for Gymboard, backed by Apache Lucene. This application includes both indexing of Gyms and other searchable entities, and a public web interface for searching those indexes.
 | 
			
		||||
 | 
			
		||||
This application is configured with read-only access to the central Gymboard database, for its indexing operations.
 | 
			
		||||
 | 
			
		||||
## Developing
 | 
			
		||||
 | 
			
		||||
Currently, this application is designed to boot up and immediately read the latest data from the Gymboard API's database to rebuild its indexes.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -50,8 +50,10 @@ public class GymIndexGenerator {
 | 
			
		|||
				String streetAddress = resultSet.getString("street_address");
 | 
			
		||||
				BigDecimal latitude = resultSet.getBigDecimal("latitude");
 | 
			
		||||
				BigDecimal longitude = resultSet.getBigDecimal("longitude");
 | 
			
		||||
				String gymCompoundId = String.format("%s/%s/%s", countryCode, cityShortName, shortName);
 | 
			
		||||
 | 
			
		||||
				Document doc = new Document();
 | 
			
		||||
				doc.add(new StoredField("compound_id", gymCompoundId));
 | 
			
		||||
				doc.add(new TextField("short_name", shortName, Field.Store.YES));
 | 
			
		||||
				doc.add(new TextField("display_name", displayName, Field.Store.YES));
 | 
			
		||||
				doc.add(new TextField("city_short_name", cityShortName, Field.Store.YES));
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -10,34 +10,42 @@ import org.springframework.stereotype.Service;
 | 
			
		|||
 | 
			
		||||
import java.io.IOException;
 | 
			
		||||
import java.nio.file.Path;
 | 
			
		||||
import java.util.ArrayList;
 | 
			
		||||
import java.util.Collections;
 | 
			
		||||
import java.util.List;
 | 
			
		||||
import java.util.*;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Searcher that uses a Lucene {@link IndexSearcher} to search for gyms using
 | 
			
		||||
 * a query that's built from a weighted list of wildcard search terms.
 | 
			
		||||
 * <ol>
 | 
			
		||||
 *     <li>If the query is blank, return an empty list.</li>
 | 
			
		||||
 *     <li>Split the query into words, append the wildcard '*' to each word.</li>
 | 
			
		||||
 *     <li>For each word, add a boosted wildcard query for each weighted field.</li>
 | 
			
		||||
 * </ol>
 | 
			
		||||
 */
 | 
			
		||||
@Service
 | 
			
		||||
public class GymIndexSearcher {
 | 
			
		||||
	public List<GymResponse> searchGyms(String rawQuery) {
 | 
			
		||||
		if (rawQuery == null || rawQuery.isBlank()) return Collections.emptyList();
 | 
			
		||||
		String[] terms = rawQuery.split("\\s+");
 | 
			
		||||
		BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
 | 
			
		||||
		String[] searchableFields = {
 | 
			
		||||
			"short_name",
 | 
			
		||||
			"display_name",
 | 
			
		||||
			"city_short_name",
 | 
			
		||||
			"city_name",
 | 
			
		||||
			"country_code",
 | 
			
		||||
			"country_name",
 | 
			
		||||
			"street_address"
 | 
			
		||||
		};
 | 
			
		||||
		Map<String, Float> fieldWeights = new HashMap<>();
 | 
			
		||||
		fieldWeights.put("short_name", 3f);
 | 
			
		||||
		fieldWeights.put("display_name", 3f);
 | 
			
		||||
		fieldWeights.put("city_short_name", 1f);
 | 
			
		||||
		fieldWeights.put("city_name", 1f);
 | 
			
		||||
		fieldWeights.put("country_code", 0.25f);
 | 
			
		||||
		fieldWeights.put("country_name", 0.5f);
 | 
			
		||||
		fieldWeights.put("street_address", 0.1f);
 | 
			
		||||
		for (String term : terms) {
 | 
			
		||||
			for (String field : searchableFields) {
 | 
			
		||||
				queryBuilder.add(new TermQuery(new Term(field, term)), BooleanClause.Occur.SHOULD);
 | 
			
		||||
			String searchTerm = term.strip() + "*";
 | 
			
		||||
			for (var entry : fieldWeights.entrySet()) {
 | 
			
		||||
				Query baseQuery = new WildcardQuery(new Term(entry.getKey(), searchTerm));
 | 
			
		||||
				queryBuilder.add(new BoostQuery(baseQuery, entry.getValue()), BooleanClause.Occur.SHOULD);
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		BooleanQuery query = queryBuilder.build();
 | 
			
		||||
		Path gymIndexDir = Path.of("gym-index");
 | 
			
		||||
		try (
 | 
			
		||||
				var reader = DirectoryReader.open(FSDirectory.open(gymIndexDir));
 | 
			
		||||
				var reader = DirectoryReader.open(FSDirectory.open(gymIndexDir))
 | 
			
		||||
		) {
 | 
			
		||||
			IndexSearcher searcher = new IndexSearcher(reader);
 | 
			
		||||
			List<GymResponse> results = new ArrayList<>(10);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue