From ee5ff41167f79f5f1a3361d737f3f283ba44a09e Mon Sep 17 00:00:00 2001 From: Andrew Lalis Date: Mon, 6 Feb 2023 13:28:51 +0100 Subject: [PATCH] Refactored indexing and searching to be modular. --- .../auth/controller/UserController.java | 5 + .../auth/dto/UserPersonalDetailsResponse.java | 12 +- gymboard-app/src/api/main/auth.ts | 39 ++++++- gymboard-search/.gitignore | 1 + .../GymboardSearchApplication.java | 14 ++- .../gymboardsearch/SearchController.java | 16 ++- .../gymboardsearch/dto/UserResponse.java | 15 +++ .../index/GymIndexGenerator.java | 74 ------------ .../index/GymIndexSearcher.java | 63 ----------- .../gymboardsearch/index/IndexComponents.java | 106 ++++++++++++++++++ .../index/JdbcConnectionSupplier.java | 8 ++ .../index/JdbcIndexGenerator.java | 67 +++++++++++ .../index/JdbcResultDocumentMapper.java | 10 ++ .../index/JdbcResultSetSupplier.java | 10 ++ .../index/PlainQueryResultSetSupplier.java | 27 +++++ .../index/QueryIndexSearcher.java | 53 +++++++++ .../index/WeightedWildcardQueryBuilder.java | 40 +++++++ .../src/main/resources/sql/select-users.sql | 7 ++ 18 files changed, 413 insertions(+), 154 deletions(-) create mode 100644 gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/dto/UserResponse.java delete mode 100644 gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/GymIndexGenerator.java delete mode 100644 gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/GymIndexSearcher.java create mode 100644 gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/IndexComponents.java create mode 100644 gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/JdbcConnectionSupplier.java create mode 100644 gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/JdbcIndexGenerator.java create mode 100644 gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/JdbcResultDocumentMapper.java create mode 100644 gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/JdbcResultSetSupplier.java create mode 100644 gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/PlainQueryResultSetSupplier.java create mode 100644 gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/QueryIndexSearcher.java create mode 100644 gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/WeightedWildcardQueryBuilder.java create mode 100644 gymboard-search/src/main/resources/sql/select-users.sql diff --git a/gymboard-api/src/main/java/nl/andrewlalis/gymboard_api/domains/auth/controller/UserController.java b/gymboard-api/src/main/java/nl/andrewlalis/gymboard_api/domains/auth/controller/UserController.java index d17c5ab..2603188 100644 --- a/gymboard-api/src/main/java/nl/andrewlalis/gymboard_api/domains/auth/controller/UserController.java +++ b/gymboard-api/src/main/java/nl/andrewlalis/gymboard_api/domains/auth/controller/UserController.java @@ -28,6 +28,11 @@ public class UserController { return new UserResponse(user); } + @GetMapping(path = "/auth/users/{userId}") + public UserResponse getUser(@PathVariable String userId) { + return userService.getUser(userId); + } + /** * Endpoint for updating one's own password. * @param user The user that's updating their password. diff --git a/gymboard-api/src/main/java/nl/andrewlalis/gymboard_api/domains/auth/dto/UserPersonalDetailsResponse.java b/gymboard-api/src/main/java/nl/andrewlalis/gymboard_api/domains/auth/dto/UserPersonalDetailsResponse.java index 7ba7613..03b972e 100644 --- a/gymboard-api/src/main/java/nl/andrewlalis/gymboard_api/domains/auth/dto/UserPersonalDetailsResponse.java +++ b/gymboard-api/src/main/java/nl/andrewlalis/gymboard_api/domains/auth/dto/UserPersonalDetailsResponse.java @@ -7,18 +7,18 @@ import java.time.format.DateTimeFormatter; public record UserPersonalDetailsResponse( String userId, String birthDate, - float currentWeight, + Float currentWeight, String currentWeightUnit, - float currentMetricWeight, + Float currentMetricWeight, String sex ) { public UserPersonalDetailsResponse(UserPersonalDetails pd) { this( pd.getUserId(), - pd.getBirthDate().format(DateTimeFormatter.ISO_LOCAL_DATE), - pd.getCurrentWeight().floatValue(), - pd.getCurrentWeightUnit().name(), - pd.getCurrentMetricWeight().floatValue(), + pd.getBirthDate() == null ? null : pd.getBirthDate().format(DateTimeFormatter.ISO_LOCAL_DATE), + pd.getCurrentWeight() == null ? null : pd.getCurrentWeight().floatValue(), + pd.getCurrentWeightUnit() == null ? null : pd.getCurrentWeightUnit().name(), + pd.getCurrentMetricWeight() == null ? null : pd.getCurrentMetricWeight().floatValue(), pd.getSex().name() ); } diff --git a/gymboard-app/src/api/main/auth.ts b/gymboard-app/src/api/main/auth.ts index f036b42..acb1beb 100644 --- a/gymboard-app/src/api/main/auth.ts +++ b/gymboard-app/src/api/main/auth.ts @@ -1,5 +1,5 @@ -import { api } from 'src/api/main/index'; -import { AuthStoreType } from 'stores/auth-store'; +import {api} from 'src/api/main/index'; +import {AuthStoreType} from 'stores/auth-store'; import Timeout = NodeJS.Timeout; export interface User { @@ -9,6 +9,26 @@ export interface User { name: string; } +export enum PersonSex { + MALE = 'MALE', + FEMALE = 'FEMALE', + UNKNOWN = 'UNKNOWN' +} + +export interface UserPersonalDetails { + userId: string; + birthDate?: string; + currentWeight?: number; + currentWeightUnit?: number; + sex: PersonSex; +} + +export interface UserPreferences { + userId: string; + accountPrivate: boolean; + locale: string; +} + export interface TokenCredentials { email: string; password: string; @@ -66,6 +86,11 @@ class AuthModule { return response.data; } + public async fetchUser(userId: string, authStore: AuthStoreType): Promise { + const response = await api.get(`/auth/users/${userId}`, authStore.axiosConfig); + return response.data; + } + public async updatePassword(newPassword: string, authStore: AuthStoreType) { await api.post( '/auth/me/password', @@ -84,6 +109,16 @@ class AuthModule { newPassword: newPassword, }); } + + public async getMyPersonalDetails(authStore: AuthStoreType): Promise { + const response = await api.get('/auth/me/personal-details', authStore.axiosConfig); + return response.data; + } + + public async getMyPreferences(authStore: AuthStoreType): Promise { + const response = await api.get('/auth/me/preferences', authStore.axiosConfig); + return response.data; + } } export default AuthModule; diff --git a/gymboard-search/.gitignore b/gymboard-search/.gitignore index d97d9f0..e91b450 100644 --- a/gymboard-search/.gitignore +++ b/gymboard-search/.gitignore @@ -33,3 +33,4 @@ build/ .vscode/ gym-index/ +user-index/ diff --git a/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/GymboardSearchApplication.java b/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/GymboardSearchApplication.java index 4033346..ece26c4 100644 --- a/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/GymboardSearchApplication.java +++ b/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/GymboardSearchApplication.java @@ -1,6 +1,6 @@ package nl.andrewlalis.gymboardsearch; -import nl.andrewlalis.gymboardsearch.index.GymIndexGenerator; +import nl.andrewlalis.gymboardsearch.index.JdbcIndexGenerator; import org.springframework.boot.CommandLineRunner; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; @@ -9,10 +9,10 @@ import java.util.TimeZone; @SpringBootApplication public class GymboardSearchApplication implements CommandLineRunner { - private final GymIndexGenerator gymIndexGenerator; - public GymboardSearchApplication(GymIndexGenerator gymIndexGenerator) { + public GymboardSearchApplication(JdbcIndexGenerator gymIndexGenerator, JdbcIndexGenerator userIndexGenerator) { this.gymIndexGenerator = gymIndexGenerator; + this.userIndexGenerator = userIndexGenerator; } public static void main(String[] args) { @@ -20,8 +20,12 @@ public class GymboardSearchApplication implements CommandLineRunner { SpringApplication.run(GymboardSearchApplication.class, args); } + private final JdbcIndexGenerator gymIndexGenerator; + private final JdbcIndexGenerator userIndexGenerator; + @Override - public void run(String... args) throws Exception { - gymIndexGenerator.generateIndex(); + public void run(String... args) { + gymIndexGenerator.generate(); + userIndexGenerator.generate(); } } diff --git a/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/SearchController.java b/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/SearchController.java index ed90431..23afe6c 100644 --- a/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/SearchController.java +++ b/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/SearchController.java @@ -1,7 +1,8 @@ package nl.andrewlalis.gymboardsearch; import nl.andrewlalis.gymboardsearch.dto.GymResponse; -import nl.andrewlalis.gymboardsearch.index.GymIndexSearcher; +import nl.andrewlalis.gymboardsearch.dto.UserResponse; +import nl.andrewlalis.gymboardsearch.index.QueryIndexSearcher; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RestController; @@ -10,14 +11,21 @@ import java.util.List; @RestController public class SearchController { - private final GymIndexSearcher gymIndexSearcher; + private final QueryIndexSearcher gymIndexSearcher; + private final QueryIndexSearcher userIndexSearcher; - public SearchController(GymIndexSearcher gymIndexSearcher) { + public SearchController(QueryIndexSearcher gymIndexSearcher, QueryIndexSearcher userIndexSearcher) { this.gymIndexSearcher = gymIndexSearcher; + this.userIndexSearcher = userIndexSearcher; } @GetMapping(path = "/search/gyms") public List searchGyms(@RequestParam(name = "q", required = false) String query) { - return gymIndexSearcher.searchGyms(query); + return gymIndexSearcher.search(query); + } + + @GetMapping(path = "/search/users") + public List searchUsers(@RequestParam(name = "q", required = false) String query) { + return userIndexSearcher.search(query); } } diff --git a/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/dto/UserResponse.java b/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/dto/UserResponse.java new file mode 100644 index 0000000..dc1cae7 --- /dev/null +++ b/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/dto/UserResponse.java @@ -0,0 +1,15 @@ +package nl.andrewlalis.gymboardsearch.dto; + +import org.apache.lucene.document.Document; + +public record UserResponse( + String id, + String name +) { + public UserResponse(Document doc) { + this( + doc.get("id"), + doc.get("name") + ); + } +} diff --git a/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/GymIndexGenerator.java b/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/GymIndexGenerator.java deleted file mode 100644 index fe85338..0000000 --- a/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/GymIndexGenerator.java +++ /dev/null @@ -1,74 +0,0 @@ -package nl.andrewlalis.gymboardsearch.index; - -import nl.andrewlalis.gymboardsearch.DbUtils; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.*; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.FSDirectory; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.stereotype.Service; -import org.springframework.util.FileSystemUtils; - -import java.math.BigDecimal; -import java.nio.file.Files; -import java.nio.file.Path; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.PreparedStatement; -import java.sql.ResultSet; - -@Service -public class GymIndexGenerator { - private static final Logger log = LoggerFactory.getLogger(GymIndexGenerator.class); - - public void generateIndex() throws Exception { - log.info("Starting Gym index generation."); - Path gymIndexDir = Path.of("gym-index"); - FileSystemUtils.deleteRecursively(gymIndexDir); - Files.createDirectory(gymIndexDir); - long count = 0; - try ( - Connection conn = DriverManager.getConnection("jdbc:postgresql://localhost:5432/gymboard-api-dev", "gymboard-api-dev", "testpass"); - PreparedStatement stmt = conn.prepareStatement(DbUtils.loadClasspathString("/sql/select-gyms.sql")); - ResultSet resultSet = stmt.executeQuery(); - - Analyzer analyzer = new StandardAnalyzer(); - Directory indexDir = FSDirectory.open(gymIndexDir); - IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(analyzer)) - ) { - while (resultSet.next()) { - String shortName = resultSet.getString("short_name"); - String displayName = resultSet.getString("display_name"); - String cityShortName = resultSet.getString("city_short_name"); - String cityName = resultSet.getString("city_name"); - String countryCode = resultSet.getString("country_code"); - String countryName = resultSet.getString("country_name"); - String streetAddress = resultSet.getString("street_address"); - BigDecimal latitude = resultSet.getBigDecimal("latitude"); - BigDecimal longitude = resultSet.getBigDecimal("longitude"); - String gymCompoundId = String.format("%s_%s_%s", countryCode, cityShortName, shortName); - - Document doc = new Document(); - doc.add(new StoredField("compound_id", gymCompoundId)); - doc.add(new TextField("short_name", shortName, Field.Store.YES)); - doc.add(new TextField("display_name", displayName, Field.Store.YES)); - doc.add(new TextField("city_short_name", cityShortName, Field.Store.YES)); - doc.add(new TextField("city_name", cityName, Field.Store.YES)); - doc.add(new TextField("country_code", countryCode, Field.Store.YES)); - doc.add(new TextField("country_name", countryName, Field.Store.YES)); - doc.add(new TextField("street_address", streetAddress, Field.Store.YES)); - doc.add(new DoublePoint("latitude_point", latitude.doubleValue())); - doc.add(new StoredField("latitude", latitude.doubleValue())); - doc.add(new DoublePoint("longitude_point", longitude.doubleValue())); - doc.add(new StoredField("longitude", longitude.doubleValue())); - indexWriter.addDocument(doc); - count++; - } - } - log.info("Gym index generation complete. {} gyms indexed.", count); - } -} diff --git a/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/GymIndexSearcher.java b/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/GymIndexSearcher.java deleted file mode 100644 index a0c2cb0..0000000 --- a/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/GymIndexSearcher.java +++ /dev/null @@ -1,63 +0,0 @@ -package nl.andrewlalis.gymboardsearch.index; - -import nl.andrewlalis.gymboardsearch.dto.GymResponse; -import org.apache.lucene.document.Document; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.*; -import org.apache.lucene.store.FSDirectory; -import org.springframework.stereotype.Service; - -import java.io.IOException; -import java.nio.file.Path; -import java.util.*; - -/** - * Searcher that uses a Lucene {@link IndexSearcher} to search for gyms using - * a query that's built from a weighted list of wildcard search terms. - *
    - *
  1. If the query is blank, return an empty list.
  2. - *
  3. Split the query into words, append the wildcard '*' to each word.
  4. - *
  5. For each word, add a boosted wildcard query for each weighted field.
  6. - *
- */ -@Service -public class GymIndexSearcher { - public List searchGyms(String rawQuery) { - if (rawQuery == null || rawQuery.isBlank()) return Collections.emptyList(); - String[] terms = rawQuery.split("\\s+"); - BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); - Map fieldWeights = new HashMap<>(); - fieldWeights.put("short_name", 3f); - fieldWeights.put("display_name", 3f); - fieldWeights.put("city_short_name", 1f); - fieldWeights.put("city_name", 1f); - fieldWeights.put("country_code", 0.25f); - fieldWeights.put("country_name", 0.5f); - fieldWeights.put("street_address", 0.1f); - for (String term : terms) { - String searchTerm = term.strip().toLowerCase() + "*"; - for (var entry : fieldWeights.entrySet()) { - Query baseQuery = new WildcardQuery(new Term(entry.getKey(), searchTerm)); - queryBuilder.add(new BoostQuery(baseQuery, entry.getValue()), BooleanClause.Occur.SHOULD); - } - } - BooleanQuery query = queryBuilder.build(); - Path gymIndexDir = Path.of("gym-index"); - try ( - var reader = DirectoryReader.open(FSDirectory.open(gymIndexDir)) - ) { - IndexSearcher searcher = new IndexSearcher(reader); - List results = new ArrayList<>(10); - TopDocs topDocs = searcher.search(query, 10, Sort.RELEVANCE, false); - for (ScoreDoc scoreDoc : topDocs.scoreDocs) { - Document doc = searcher.doc(scoreDoc.doc); - results.add(new GymResponse(doc)); - } - return results; - } catch (IOException e) { - e.printStackTrace(); - return Collections.emptyList(); - } - } -} diff --git a/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/IndexComponents.java b/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/IndexComponents.java new file mode 100644 index 0000000..19badcb --- /dev/null +++ b/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/IndexComponents.java @@ -0,0 +1,106 @@ +package nl.andrewlalis.gymboardsearch.index; + +import nl.andrewlalis.gymboardsearch.dto.GymResponse; +import nl.andrewlalis.gymboardsearch.dto.UserResponse; +import org.apache.lucene.document.*; +import org.springframework.context.annotation.Bean; +import org.springframework.stereotype.Component; + +import java.io.IOException; +import java.math.BigDecimal; +import java.nio.file.Path; +import java.sql.DriverManager; + +/** + * Component that defines beans for the various indexes that this service + * supports. Beans are primarily constructed using the reusable "jdbc" + * components so that all index and search configuration is defined here. + */ +@Component +public class IndexComponents { + @Bean + public JdbcConnectionSupplier jdbcConnectionSupplier() { + return () -> DriverManager.getConnection("jdbc:postgresql://localhost:5432/gymboard-api-dev", "gymboard-api-dev", "testpass"); + } + + @Bean + public JdbcIndexGenerator userIndexGenerator(JdbcConnectionSupplier connectionSupplier) throws IOException { + return new JdbcIndexGenerator( + Path.of("user-index"), + connectionSupplier, + PlainQueryResultSetSupplier.fromResourceFile("/sql/select-users.sql"), + rs -> { + var doc = new Document(); + doc.add(new StoredField("id", rs.getString("id"))); + doc.add(new TextField("name", rs.getString("name"), Field.Store.YES)); + return doc; + } + ); + } + + @Bean + public QueryIndexSearcher userIndexSearcher() { + return new QueryIndexSearcher<>( + UserResponse::new, + s -> new WeightedWildcardQueryBuilder() + .withField("name", 1f) + .build(s), + 10, + Path.of("user-index") + ); + } + + @Bean + public JdbcIndexGenerator gymIndexGenerator(JdbcConnectionSupplier connectionSupplier) throws IOException { + return new JdbcIndexGenerator( + Path.of("gym-index"), + connectionSupplier, + PlainQueryResultSetSupplier.fromResourceFile("/sql/select-gyms.sql"), + rs -> { + String shortName = rs.getString("short_name"); + String displayName = rs.getString("display_name"); + String cityShortName = rs.getString("city_short_name"); + String cityName = rs.getString("city_name"); + String countryCode = rs.getString("country_code"); + String countryName = rs.getString("country_name"); + String streetAddress = rs.getString("street_address"); + BigDecimal latitude = rs.getBigDecimal("latitude"); + BigDecimal longitude = rs.getBigDecimal("longitude"); + String gymCompoundId = String.format("%s_%s_%s", countryCode, cityShortName, shortName); + + Document doc = new Document(); + doc.add(new StoredField("compound_id", gymCompoundId)); + doc.add(new TextField("short_name", shortName, Field.Store.YES)); + doc.add(new TextField("display_name", displayName, Field.Store.YES)); + doc.add(new TextField("city_short_name", cityShortName, Field.Store.YES)); + doc.add(new TextField("city_name", cityName, Field.Store.YES)); + doc.add(new TextField("country_code", countryCode, Field.Store.YES)); + doc.add(new TextField("country_name", countryName, Field.Store.YES)); + doc.add(new TextField("street_address", streetAddress, Field.Store.YES)); + doc.add(new DoublePoint("latitude_point", latitude.doubleValue())); + doc.add(new StoredField("latitude", latitude.doubleValue())); + doc.add(new DoublePoint("longitude_point", longitude.doubleValue())); + doc.add(new StoredField("longitude", longitude.doubleValue())); + return doc; + } + ); + } + + @Bean + public QueryIndexSearcher gymIndexSearcher() { + return new QueryIndexSearcher<>( + GymResponse::new, + s -> new WeightedWildcardQueryBuilder() + .withField("short_name", 3f) + .withField("display_name", 3f) + .withField("city_short_name", 1f) + .withField("city_name", 1f) + .withField("country_code", 0.25f) + .withField("country_name", 0.5f) + .withField("street_address", 0.1f) + .build(s), + 10, + Path.of("gym-index") + ); + } +} diff --git a/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/JdbcConnectionSupplier.java b/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/JdbcConnectionSupplier.java new file mode 100644 index 0000000..1bd260d --- /dev/null +++ b/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/JdbcConnectionSupplier.java @@ -0,0 +1,8 @@ +package nl.andrewlalis.gymboardsearch.index; + +import java.sql.Connection; +import java.sql.SQLException; + +public interface JdbcConnectionSupplier { + Connection getConnection() throws SQLException; +} diff --git a/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/JdbcIndexGenerator.java b/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/JdbcIndexGenerator.java new file mode 100644 index 0000000..48e3610 --- /dev/null +++ b/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/JdbcIndexGenerator.java @@ -0,0 +1,67 @@ +package nl.andrewlalis.gymboardsearch.index; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.util.FileSystemUtils; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.sql.Connection; +import java.sql.ResultSet; + +public class JdbcIndexGenerator { + private static final Logger log = LoggerFactory.getLogger(JdbcIndexGenerator.class); + + private final Path indexDir; + private final JdbcConnectionSupplier connectionSupplier; + private final JdbcResultSetSupplier resultSetSupplier; + private final JdbcResultDocumentMapper resultMapper; + + public JdbcIndexGenerator(Path indexDir, JdbcConnectionSupplier connectionSupplier, JdbcResultSetSupplier resultSetSupplier, JdbcResultDocumentMapper resultMapper) { + this.indexDir = indexDir; + this.connectionSupplier = connectionSupplier; + this.resultSetSupplier = resultSetSupplier; + this.resultMapper = resultMapper; + } + + public void generate() { + log.info("Generating index at {}.", indexDir); + if (Files.exists(indexDir)) { + try { + FileSystemUtils.deleteRecursively(indexDir); + Files.createDirectory(indexDir); + } catch (IOException e) { + log.error("Failed to reset index directory.", e); + return; + } + } + try ( + Connection conn = connectionSupplier.getConnection(); + ResultSet rs = resultSetSupplier.supply(conn); + + Analyzer analyzer = new StandardAnalyzer(); + Directory luceneDir = FSDirectory.open(indexDir); + IndexWriter indexWriter = new IndexWriter(luceneDir, new IndexWriterConfig(analyzer)) + ) { + long count = 0; + while (rs.next()) { + try { + indexWriter.addDocument(resultMapper.map(rs)); + count++; + } catch (Exception e) { + log.error("Failed to add document.", e); + } + } + log.info("Indexed {} entities.", count); + } catch (Exception e) { + log.error("Failed to prepare indexing components.", e); + } + } +} diff --git a/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/JdbcResultDocumentMapper.java b/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/JdbcResultDocumentMapper.java new file mode 100644 index 0000000..4b8c4f0 --- /dev/null +++ b/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/JdbcResultDocumentMapper.java @@ -0,0 +1,10 @@ +package nl.andrewlalis.gymboardsearch.index; + +import org.apache.lucene.document.Document; + +import java.sql.ResultSet; + +@FunctionalInterface +public interface JdbcResultDocumentMapper { + Document map(ResultSet rs) throws Exception; +} diff --git a/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/JdbcResultSetSupplier.java b/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/JdbcResultSetSupplier.java new file mode 100644 index 0000000..8bd0d04 --- /dev/null +++ b/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/JdbcResultSetSupplier.java @@ -0,0 +1,10 @@ +package nl.andrewlalis.gymboardsearch.index; + +import java.io.IOException; +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; + +public interface JdbcResultSetSupplier { + ResultSet supply(Connection conn) throws SQLException, IOException; +} diff --git a/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/PlainQueryResultSetSupplier.java b/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/PlainQueryResultSetSupplier.java new file mode 100644 index 0000000..affb7b4 --- /dev/null +++ b/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/PlainQueryResultSetSupplier.java @@ -0,0 +1,27 @@ +package nl.andrewlalis.gymboardsearch.index; + +import nl.andrewlalis.gymboardsearch.DbUtils; + +import java.io.IOException; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; + +public class PlainQueryResultSetSupplier implements JdbcResultSetSupplier { + private final String query; + + public PlainQueryResultSetSupplier(String query) { + this.query = query; + } + + public static PlainQueryResultSetSupplier fromResourceFile(String resource) throws IOException { + return new PlainQueryResultSetSupplier(DbUtils.loadClasspathString(resource)); + } + + @Override + public ResultSet supply(Connection conn) throws SQLException { + PreparedStatement stmt = conn.prepareStatement(query); + return stmt.executeQuery(); + } +} diff --git a/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/QueryIndexSearcher.java b/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/QueryIndexSearcher.java new file mode 100644 index 0000000..a0da6eb --- /dev/null +++ b/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/QueryIndexSearcher.java @@ -0,0 +1,53 @@ +package nl.andrewlalis.gymboardsearch.index; + +import org.apache.lucene.document.Document; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.search.*; +import org.apache.lucene.store.FSDirectory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import java.util.function.Function; + +public class QueryIndexSearcher { + private static final Logger log = LoggerFactory.getLogger(QueryIndexSearcher.class); + + private final Function mapper; + private final Function> querySupplier; + private final int maxResults; + private final Path indexDir; + + public QueryIndexSearcher(Function mapper, Function> querySupplier, int maxResults, Path indexDir) { + this.mapper = mapper; + this.querySupplier = querySupplier; + this.maxResults = maxResults; + this.indexDir = indexDir; + } + + public List search(String rawQuery) { + Optional optionalQuery = querySupplier.apply(rawQuery); + if (optionalQuery.isEmpty()) return Collections.emptyList(); + Query query = optionalQuery.get(); + try ( + var reader = DirectoryReader.open(FSDirectory.open(indexDir)) + ) { + IndexSearcher searcher = new IndexSearcher(reader); + List results = new ArrayList<>(maxResults); + TopDocs topDocs = searcher.search(query, maxResults, Sort.RELEVANCE, false); + for (ScoreDoc scoreDoc : topDocs.scoreDocs) { + Document doc = searcher.doc(scoreDoc.doc); + results.add(mapper.apply(doc)); + } + return results; + } catch (IOException e) { + log.error("Could not search index.", e); + return Collections.emptyList(); + } + } +} diff --git a/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/WeightedWildcardQueryBuilder.java b/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/WeightedWildcardQueryBuilder.java new file mode 100644 index 0000000..6e795c9 --- /dev/null +++ b/gymboard-search/src/main/java/nl/andrewlalis/gymboardsearch/index/WeightedWildcardQueryBuilder.java @@ -0,0 +1,40 @@ +package nl.andrewlalis.gymboardsearch.index; + +import org.apache.lucene.index.Term; +import org.apache.lucene.search.*; + +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import java.util.function.Consumer; + +public class WeightedWildcardQueryBuilder { + private final BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); + private final Map fieldWeights = new HashMap<>(); + + public WeightedWildcardQueryBuilder withField(String fieldName, float weight) { + fieldWeights.put(fieldName, weight); + return this; + } + + public WeightedWildcardQueryBuilder customize(Consumer customizer) { + customizer.accept(queryBuilder); + return this; + } + + public Optional build(String rawSearchQuery) { + if (rawSearchQuery == null || rawSearchQuery.isBlank()) return Optional.empty(); + String[] terms = rawSearchQuery.toLowerCase().split("\\s+"); + for (String term : terms) { + String searchTerm = term + "*"; + for (var entry : fieldWeights.entrySet()) { + String fieldName = entry.getKey(); + float weight = entry.getValue(); + + Query baseQuery = new WildcardQuery(new Term(fieldName, searchTerm)); + queryBuilder.add(new BoostQuery(baseQuery, weight), BooleanClause.Occur.SHOULD); + } + } + return Optional.of(queryBuilder.build()); + } +} diff --git a/gymboard-search/src/main/resources/sql/select-users.sql b/gymboard-search/src/main/resources/sql/select-users.sql new file mode 100644 index 0000000..b933eba --- /dev/null +++ b/gymboard-search/src/main/resources/sql/select-users.sql @@ -0,0 +1,7 @@ +SELECT + u.id as id, + u.email as email, + u.name as name +FROM auth_user u +WHERE u.activated = TRUE +ORDER BY u.created_at; \ No newline at end of file