Refactored indexing and searching to be modular.

This commit is contained in:
Andrew Lalis 2023-02-06 13:28:51 +01:00
parent a8715fa8d2
commit ee5ff41167
18 changed files with 413 additions and 154 deletions

View File

@ -28,6 +28,11 @@ public class UserController {
return new UserResponse(user);
}
@GetMapping(path = "/auth/users/{userId}")
public UserResponse getUser(@PathVariable String userId) {
return userService.getUser(userId);
}
/**
* Endpoint for updating one's own password.
* @param user The user that's updating their password.

View File

@ -7,18 +7,18 @@ import java.time.format.DateTimeFormatter;
public record UserPersonalDetailsResponse(
String userId,
String birthDate,
float currentWeight,
Float currentWeight,
String currentWeightUnit,
float currentMetricWeight,
Float currentMetricWeight,
String sex
) {
public UserPersonalDetailsResponse(UserPersonalDetails pd) {
this(
pd.getUserId(),
pd.getBirthDate().format(DateTimeFormatter.ISO_LOCAL_DATE),
pd.getCurrentWeight().floatValue(),
pd.getCurrentWeightUnit().name(),
pd.getCurrentMetricWeight().floatValue(),
pd.getBirthDate() == null ? null : pd.getBirthDate().format(DateTimeFormatter.ISO_LOCAL_DATE),
pd.getCurrentWeight() == null ? null : pd.getCurrentWeight().floatValue(),
pd.getCurrentWeightUnit() == null ? null : pd.getCurrentWeightUnit().name(),
pd.getCurrentMetricWeight() == null ? null : pd.getCurrentMetricWeight().floatValue(),
pd.getSex().name()
);
}

View File

@ -1,5 +1,5 @@
import { api } from 'src/api/main/index';
import { AuthStoreType } from 'stores/auth-store';
import {api} from 'src/api/main/index';
import {AuthStoreType} from 'stores/auth-store';
import Timeout = NodeJS.Timeout;
export interface User {
@ -9,6 +9,26 @@ export interface User {
name: string;
}
export enum PersonSex {
MALE = 'MALE',
FEMALE = 'FEMALE',
UNKNOWN = 'UNKNOWN'
}
export interface UserPersonalDetails {
userId: string;
birthDate?: string;
currentWeight?: number;
currentWeightUnit?: number;
sex: PersonSex;
}
export interface UserPreferences {
userId: string;
accountPrivate: boolean;
locale: string;
}
export interface TokenCredentials {
email: string;
password: string;
@ -66,6 +86,11 @@ class AuthModule {
return response.data;
}
public async fetchUser(userId: string, authStore: AuthStoreType): Promise<User> {
const response = await api.get(`/auth/users/${userId}`, authStore.axiosConfig);
return response.data;
}
public async updatePassword(newPassword: string, authStore: AuthStoreType) {
await api.post(
'/auth/me/password',
@ -84,6 +109,16 @@ class AuthModule {
newPassword: newPassword,
});
}
public async getMyPersonalDetails(authStore: AuthStoreType): Promise<UserPersonalDetails> {
const response = await api.get('/auth/me/personal-details', authStore.axiosConfig);
return response.data;
}
public async getMyPreferences(authStore: AuthStoreType): Promise<UserPreferences> {
const response = await api.get('/auth/me/preferences', authStore.axiosConfig);
return response.data;
}
}
export default AuthModule;

View File

@ -33,3 +33,4 @@ build/
.vscode/
gym-index/
user-index/

View File

@ -1,6 +1,6 @@
package nl.andrewlalis.gymboardsearch;
import nl.andrewlalis.gymboardsearch.index.GymIndexGenerator;
import nl.andrewlalis.gymboardsearch.index.JdbcIndexGenerator;
import org.springframework.boot.CommandLineRunner;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
@ -9,10 +9,10 @@ import java.util.TimeZone;
@SpringBootApplication
public class GymboardSearchApplication implements CommandLineRunner {
private final GymIndexGenerator gymIndexGenerator;
public GymboardSearchApplication(GymIndexGenerator gymIndexGenerator) {
public GymboardSearchApplication(JdbcIndexGenerator gymIndexGenerator, JdbcIndexGenerator userIndexGenerator) {
this.gymIndexGenerator = gymIndexGenerator;
this.userIndexGenerator = userIndexGenerator;
}
public static void main(String[] args) {
@ -20,8 +20,12 @@ public class GymboardSearchApplication implements CommandLineRunner {
SpringApplication.run(GymboardSearchApplication.class, args);
}
private final JdbcIndexGenerator gymIndexGenerator;
private final JdbcIndexGenerator userIndexGenerator;
@Override
public void run(String... args) throws Exception {
gymIndexGenerator.generateIndex();
public void run(String... args) {
gymIndexGenerator.generate();
userIndexGenerator.generate();
}
}

View File

@ -1,7 +1,8 @@
package nl.andrewlalis.gymboardsearch;
import nl.andrewlalis.gymboardsearch.dto.GymResponse;
import nl.andrewlalis.gymboardsearch.index.GymIndexSearcher;
import nl.andrewlalis.gymboardsearch.dto.UserResponse;
import nl.andrewlalis.gymboardsearch.index.QueryIndexSearcher;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
@ -10,14 +11,21 @@ import java.util.List;
@RestController
public class SearchController {
private final GymIndexSearcher gymIndexSearcher;
private final QueryIndexSearcher<GymResponse> gymIndexSearcher;
private final QueryIndexSearcher<UserResponse> userIndexSearcher;
public SearchController(GymIndexSearcher gymIndexSearcher) {
public SearchController(QueryIndexSearcher<GymResponse> gymIndexSearcher, QueryIndexSearcher<UserResponse> userIndexSearcher) {
this.gymIndexSearcher = gymIndexSearcher;
this.userIndexSearcher = userIndexSearcher;
}
@GetMapping(path = "/search/gyms")
public List<GymResponse> searchGyms(@RequestParam(name = "q", required = false) String query) {
return gymIndexSearcher.searchGyms(query);
return gymIndexSearcher.search(query);
}
@GetMapping(path = "/search/users")
public List<UserResponse> searchUsers(@RequestParam(name = "q", required = false) String query) {
return userIndexSearcher.search(query);
}
}

View File

@ -0,0 +1,15 @@
package nl.andrewlalis.gymboardsearch.dto;
import org.apache.lucene.document.Document;
public record UserResponse(
String id,
String name
) {
public UserResponse(Document doc) {
this(
doc.get("id"),
doc.get("name")
);
}
}

View File

@ -1,74 +0,0 @@
package nl.andrewlalis.gymboardsearch.index;
import nl.andrewlalis.gymboardsearch.DbUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import org.springframework.util.FileSystemUtils;
import java.math.BigDecimal;
import java.nio.file.Files;
import java.nio.file.Path;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
@Service
public class GymIndexGenerator {
private static final Logger log = LoggerFactory.getLogger(GymIndexGenerator.class);
public void generateIndex() throws Exception {
log.info("Starting Gym index generation.");
Path gymIndexDir = Path.of("gym-index");
FileSystemUtils.deleteRecursively(gymIndexDir);
Files.createDirectory(gymIndexDir);
long count = 0;
try (
Connection conn = DriverManager.getConnection("jdbc:postgresql://localhost:5432/gymboard-api-dev", "gymboard-api-dev", "testpass");
PreparedStatement stmt = conn.prepareStatement(DbUtils.loadClasspathString("/sql/select-gyms.sql"));
ResultSet resultSet = stmt.executeQuery();
Analyzer analyzer = new StandardAnalyzer();
Directory indexDir = FSDirectory.open(gymIndexDir);
IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(analyzer))
) {
while (resultSet.next()) {
String shortName = resultSet.getString("short_name");
String displayName = resultSet.getString("display_name");
String cityShortName = resultSet.getString("city_short_name");
String cityName = resultSet.getString("city_name");
String countryCode = resultSet.getString("country_code");
String countryName = resultSet.getString("country_name");
String streetAddress = resultSet.getString("street_address");
BigDecimal latitude = resultSet.getBigDecimal("latitude");
BigDecimal longitude = resultSet.getBigDecimal("longitude");
String gymCompoundId = String.format("%s_%s_%s", countryCode, cityShortName, shortName);
Document doc = new Document();
doc.add(new StoredField("compound_id", gymCompoundId));
doc.add(new TextField("short_name", shortName, Field.Store.YES));
doc.add(new TextField("display_name", displayName, Field.Store.YES));
doc.add(new TextField("city_short_name", cityShortName, Field.Store.YES));
doc.add(new TextField("city_name", cityName, Field.Store.YES));
doc.add(new TextField("country_code", countryCode, Field.Store.YES));
doc.add(new TextField("country_name", countryName, Field.Store.YES));
doc.add(new TextField("street_address", streetAddress, Field.Store.YES));
doc.add(new DoublePoint("latitude_point", latitude.doubleValue()));
doc.add(new StoredField("latitude", latitude.doubleValue()));
doc.add(new DoublePoint("longitude_point", longitude.doubleValue()));
doc.add(new StoredField("longitude", longitude.doubleValue()));
indexWriter.addDocument(doc);
count++;
}
}
log.info("Gym index generation complete. {} gyms indexed.", count);
}
}

View File

@ -1,63 +0,0 @@
package nl.andrewlalis.gymboardsearch.index;
import nl.andrewlalis.gymboardsearch.dto.GymResponse;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.store.FSDirectory;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.nio.file.Path;
import java.util.*;
/**
* Searcher that uses a Lucene {@link IndexSearcher} to search for gyms using
* a query that's built from a weighted list of wildcard search terms.
* <ol>
* <li>If the query is blank, return an empty list.</li>
* <li>Split the query into words, append the wildcard '*' to each word.</li>
* <li>For each word, add a boosted wildcard query for each weighted field.</li>
* </ol>
*/
@Service
public class GymIndexSearcher {
public List<GymResponse> searchGyms(String rawQuery) {
if (rawQuery == null || rawQuery.isBlank()) return Collections.emptyList();
String[] terms = rawQuery.split("\\s+");
BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
Map<String, Float> fieldWeights = new HashMap<>();
fieldWeights.put("short_name", 3f);
fieldWeights.put("display_name", 3f);
fieldWeights.put("city_short_name", 1f);
fieldWeights.put("city_name", 1f);
fieldWeights.put("country_code", 0.25f);
fieldWeights.put("country_name", 0.5f);
fieldWeights.put("street_address", 0.1f);
for (String term : terms) {
String searchTerm = term.strip().toLowerCase() + "*";
for (var entry : fieldWeights.entrySet()) {
Query baseQuery = new WildcardQuery(new Term(entry.getKey(), searchTerm));
queryBuilder.add(new BoostQuery(baseQuery, entry.getValue()), BooleanClause.Occur.SHOULD);
}
}
BooleanQuery query = queryBuilder.build();
Path gymIndexDir = Path.of("gym-index");
try (
var reader = DirectoryReader.open(FSDirectory.open(gymIndexDir))
) {
IndexSearcher searcher = new IndexSearcher(reader);
List<GymResponse> results = new ArrayList<>(10);
TopDocs topDocs = searcher.search(query, 10, Sort.RELEVANCE, false);
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
Document doc = searcher.doc(scoreDoc.doc);
results.add(new GymResponse(doc));
}
return results;
} catch (IOException e) {
e.printStackTrace();
return Collections.emptyList();
}
}
}

View File

@ -0,0 +1,106 @@
package nl.andrewlalis.gymboardsearch.index;
import nl.andrewlalis.gymboardsearch.dto.GymResponse;
import nl.andrewlalis.gymboardsearch.dto.UserResponse;
import org.apache.lucene.document.*;
import org.springframework.context.annotation.Bean;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.math.BigDecimal;
import java.nio.file.Path;
import java.sql.DriverManager;
/**
* Component that defines beans for the various indexes that this service
* supports. Beans are primarily constructed using the reusable "jdbc"
* components so that all index and search configuration is defined here.
*/
@Component
public class IndexComponents {
@Bean
public JdbcConnectionSupplier jdbcConnectionSupplier() {
return () -> DriverManager.getConnection("jdbc:postgresql://localhost:5432/gymboard-api-dev", "gymboard-api-dev", "testpass");
}
@Bean
public JdbcIndexGenerator userIndexGenerator(JdbcConnectionSupplier connectionSupplier) throws IOException {
return new JdbcIndexGenerator(
Path.of("user-index"),
connectionSupplier,
PlainQueryResultSetSupplier.fromResourceFile("/sql/select-users.sql"),
rs -> {
var doc = new Document();
doc.add(new StoredField("id", rs.getString("id")));
doc.add(new TextField("name", rs.getString("name"), Field.Store.YES));
return doc;
}
);
}
@Bean
public QueryIndexSearcher<UserResponse> userIndexSearcher() {
return new QueryIndexSearcher<>(
UserResponse::new,
s -> new WeightedWildcardQueryBuilder()
.withField("name", 1f)
.build(s),
10,
Path.of("user-index")
);
}
@Bean
public JdbcIndexGenerator gymIndexGenerator(JdbcConnectionSupplier connectionSupplier) throws IOException {
return new JdbcIndexGenerator(
Path.of("gym-index"),
connectionSupplier,
PlainQueryResultSetSupplier.fromResourceFile("/sql/select-gyms.sql"),
rs -> {
String shortName = rs.getString("short_name");
String displayName = rs.getString("display_name");
String cityShortName = rs.getString("city_short_name");
String cityName = rs.getString("city_name");
String countryCode = rs.getString("country_code");
String countryName = rs.getString("country_name");
String streetAddress = rs.getString("street_address");
BigDecimal latitude = rs.getBigDecimal("latitude");
BigDecimal longitude = rs.getBigDecimal("longitude");
String gymCompoundId = String.format("%s_%s_%s", countryCode, cityShortName, shortName);
Document doc = new Document();
doc.add(new StoredField("compound_id", gymCompoundId));
doc.add(new TextField("short_name", shortName, Field.Store.YES));
doc.add(new TextField("display_name", displayName, Field.Store.YES));
doc.add(new TextField("city_short_name", cityShortName, Field.Store.YES));
doc.add(new TextField("city_name", cityName, Field.Store.YES));
doc.add(new TextField("country_code", countryCode, Field.Store.YES));
doc.add(new TextField("country_name", countryName, Field.Store.YES));
doc.add(new TextField("street_address", streetAddress, Field.Store.YES));
doc.add(new DoublePoint("latitude_point", latitude.doubleValue()));
doc.add(new StoredField("latitude", latitude.doubleValue()));
doc.add(new DoublePoint("longitude_point", longitude.doubleValue()));
doc.add(new StoredField("longitude", longitude.doubleValue()));
return doc;
}
);
}
@Bean
public QueryIndexSearcher<GymResponse> gymIndexSearcher() {
return new QueryIndexSearcher<>(
GymResponse::new,
s -> new WeightedWildcardQueryBuilder()
.withField("short_name", 3f)
.withField("display_name", 3f)
.withField("city_short_name", 1f)
.withField("city_name", 1f)
.withField("country_code", 0.25f)
.withField("country_name", 0.5f)
.withField("street_address", 0.1f)
.build(s),
10,
Path.of("gym-index")
);
}
}

View File

@ -0,0 +1,8 @@
package nl.andrewlalis.gymboardsearch.index;
import java.sql.Connection;
import java.sql.SQLException;
public interface JdbcConnectionSupplier {
Connection getConnection() throws SQLException;
}

View File

@ -0,0 +1,67 @@
package nl.andrewlalis.gymboardsearch.index;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.util.FileSystemUtils;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.sql.Connection;
import java.sql.ResultSet;
public class JdbcIndexGenerator {
private static final Logger log = LoggerFactory.getLogger(JdbcIndexGenerator.class);
private final Path indexDir;
private final JdbcConnectionSupplier connectionSupplier;
private final JdbcResultSetSupplier resultSetSupplier;
private final JdbcResultDocumentMapper resultMapper;
public JdbcIndexGenerator(Path indexDir, JdbcConnectionSupplier connectionSupplier, JdbcResultSetSupplier resultSetSupplier, JdbcResultDocumentMapper resultMapper) {
this.indexDir = indexDir;
this.connectionSupplier = connectionSupplier;
this.resultSetSupplier = resultSetSupplier;
this.resultMapper = resultMapper;
}
public void generate() {
log.info("Generating index at {}.", indexDir);
if (Files.exists(indexDir)) {
try {
FileSystemUtils.deleteRecursively(indexDir);
Files.createDirectory(indexDir);
} catch (IOException e) {
log.error("Failed to reset index directory.", e);
return;
}
}
try (
Connection conn = connectionSupplier.getConnection();
ResultSet rs = resultSetSupplier.supply(conn);
Analyzer analyzer = new StandardAnalyzer();
Directory luceneDir = FSDirectory.open(indexDir);
IndexWriter indexWriter = new IndexWriter(luceneDir, new IndexWriterConfig(analyzer))
) {
long count = 0;
while (rs.next()) {
try {
indexWriter.addDocument(resultMapper.map(rs));
count++;
} catch (Exception e) {
log.error("Failed to add document.", e);
}
}
log.info("Indexed {} entities.", count);
} catch (Exception e) {
log.error("Failed to prepare indexing components.", e);
}
}
}

View File

@ -0,0 +1,10 @@
package nl.andrewlalis.gymboardsearch.index;
import org.apache.lucene.document.Document;
import java.sql.ResultSet;
@FunctionalInterface
public interface JdbcResultDocumentMapper {
Document map(ResultSet rs) throws Exception;
}

View File

@ -0,0 +1,10 @@
package nl.andrewlalis.gymboardsearch.index;
import java.io.IOException;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
public interface JdbcResultSetSupplier {
ResultSet supply(Connection conn) throws SQLException, IOException;
}

View File

@ -0,0 +1,27 @@
package nl.andrewlalis.gymboardsearch.index;
import nl.andrewlalis.gymboardsearch.DbUtils;
import java.io.IOException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
public class PlainQueryResultSetSupplier implements JdbcResultSetSupplier {
private final String query;
public PlainQueryResultSetSupplier(String query) {
this.query = query;
}
public static PlainQueryResultSetSupplier fromResourceFile(String resource) throws IOException {
return new PlainQueryResultSetSupplier(DbUtils.loadClasspathString(resource));
}
@Override
public ResultSet supply(Connection conn) throws SQLException {
PreparedStatement stmt = conn.prepareStatement(query);
return stmt.executeQuery();
}
}

View File

@ -0,0 +1,53 @@
package nl.andrewlalis.gymboardsearch.index;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.search.*;
import org.apache.lucene.store.FSDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.function.Function;
public class QueryIndexSearcher<R> {
private static final Logger log = LoggerFactory.getLogger(QueryIndexSearcher.class);
private final Function<Document, R> mapper;
private final Function<String, Optional<Query>> querySupplier;
private final int maxResults;
private final Path indexDir;
public QueryIndexSearcher(Function<Document, R> mapper, Function<String, Optional<Query>> querySupplier, int maxResults, Path indexDir) {
this.mapper = mapper;
this.querySupplier = querySupplier;
this.maxResults = maxResults;
this.indexDir = indexDir;
}
public List<R> search(String rawQuery) {
Optional<Query> optionalQuery = querySupplier.apply(rawQuery);
if (optionalQuery.isEmpty()) return Collections.emptyList();
Query query = optionalQuery.get();
try (
var reader = DirectoryReader.open(FSDirectory.open(indexDir))
) {
IndexSearcher searcher = new IndexSearcher(reader);
List<R> results = new ArrayList<>(maxResults);
TopDocs topDocs = searcher.search(query, maxResults, Sort.RELEVANCE, false);
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
Document doc = searcher.doc(scoreDoc.doc);
results.add(mapper.apply(doc));
}
return results;
} catch (IOException e) {
log.error("Could not search index.", e);
return Collections.emptyList();
}
}
}

View File

@ -0,0 +1,40 @@
package nl.andrewlalis.gymboardsearch.index;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.function.Consumer;
public class WeightedWildcardQueryBuilder {
private final BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
private final Map<String, Float> fieldWeights = new HashMap<>();
public WeightedWildcardQueryBuilder withField(String fieldName, float weight) {
fieldWeights.put(fieldName, weight);
return this;
}
public WeightedWildcardQueryBuilder customize(Consumer<BooleanQuery.Builder> customizer) {
customizer.accept(queryBuilder);
return this;
}
public Optional<Query> build(String rawSearchQuery) {
if (rawSearchQuery == null || rawSearchQuery.isBlank()) return Optional.empty();
String[] terms = rawSearchQuery.toLowerCase().split("\\s+");
for (String term : terms) {
String searchTerm = term + "*";
for (var entry : fieldWeights.entrySet()) {
String fieldName = entry.getKey();
float weight = entry.getValue();
Query baseQuery = new WildcardQuery(new Term(fieldName, searchTerm));
queryBuilder.add(new BoostQuery(baseQuery, weight), BooleanClause.Occur.SHOULD);
}
}
return Optional.of(queryBuilder.build());
}
}

View File

@ -0,0 +1,7 @@
SELECT
u.id as id,
u.email as email,
u.name as name
FROM auth_user u
WHERE u.activated = TRUE
ORDER BY u.created_at;