Merge branch 'charlesmchen/filterIndexText'

pull/1/head
Matthew Chen 7 years ago
commit 8d078e0a11

@ -64,6 +64,12 @@ class FakeContactsManager: NSObject, ContactsManagerProtocol {
func isSystemContact(withSignalAccount recipientId: String) -> Bool {
return true
}
func compare(signalAccount left: SignalAccount, with right: SignalAccount) -> ComparisonResult {
owsFail("if this method ends up being used by the tests, we should provide a better implementation.")
return .orderedAscending
}
}
let bobRecipientId = "+49030183000"
@ -131,6 +137,12 @@ class ConversationSearcherTest: XCTestCase {
let goodbyeBookClub = TSOutgoingMessage(in: bookClubGroupThread, messageBody: "Goodbye Book Club", attachmentId: nil)
goodbyeBookClub.save(with: transaction)
let bobsPhoneNumber = TSOutgoingMessage(in: bookClubGroupThread, messageBody: "My phone number is: 321-321-4321", attachmentId: nil)
bobsPhoneNumber.save(with: transaction)
let bobsFaxNumber = TSOutgoingMessage(in: bookClubGroupThread, messageBody: "My fax is: 222-333-4444", attachmentId: nil)
bobsFaxNumber.save(with: transaction)
}
}
@ -181,17 +193,17 @@ class ConversationSearcherTest: XCTestCase {
// Exact match
threads = searchConversations(searchText: aliceRecipientId)
XCTAssertEqual(3, threads.count)
XCTAssertEqual([bookClubThread, snackClubThread, aliceThread], threads)
XCTAssertEqual([bookClubThread, aliceThread, snackClubThread], threads)
// Partial match
threads = searchConversations(searchText: "+123456")
XCTAssertEqual(3, threads.count)
XCTAssertEqual([bookClubThread, snackClubThread, aliceThread], threads)
XCTAssertEqual([bookClubThread, aliceThread, snackClubThread], threads)
// Prefixes
threads = searchConversations(searchText: "12345678900")
XCTAssertEqual(3, threads.count)
XCTAssertEqual([bookClubThread, snackClubThread, aliceThread], threads)
XCTAssertEqual([bookClubThread, aliceThread, snackClubThread], threads)
threads = searchConversations(searchText: "49")
XCTAssertEqual(1, threads.count)
@ -199,15 +211,19 @@ class ConversationSearcherTest: XCTestCase {
threads = searchConversations(searchText: "1-234-56")
XCTAssertEqual(3, threads.count)
XCTAssertEqual([bookClubThread, snackClubThread, aliceThread], threads)
XCTAssertEqual([bookClubThread, aliceThread, snackClubThread], threads)
threads = searchConversations(searchText: "123456")
XCTAssertEqual(3, threads.count)
XCTAssertEqual([bookClubThread, snackClubThread, aliceThread], threads)
XCTAssertEqual([bookClubThread, aliceThread, snackClubThread], threads)
threads = searchConversations(searchText: "1.234.56")
XCTAssertEqual(3, threads.count)
XCTAssertEqual([bookClubThread, snackClubThread, aliceThread], threads)
XCTAssertEqual([bookClubThread, aliceThread, snackClubThread], threads)
threads = searchConversations(searchText: "1 234 56")
XCTAssertEqual(3, threads.count)
XCTAssertEqual([bookClubThread, aliceThread, snackClubThread], threads)
}
func testSearchContactByNumberWithoutCountryCode() {
@ -215,11 +231,11 @@ class ConversationSearcherTest: XCTestCase {
// Phone Number formatting should be forgiving
threads = searchConversations(searchText: "234.56")
XCTAssertEqual(3, threads.count)
XCTAssertEqual([bookClubThread, snackClubThread, aliceThread], threads)
XCTAssertEqual([bookClubThread, aliceThread, snackClubThread], threads)
threads = searchConversations(searchText: "234 56")
XCTAssertEqual(3, threads.count)
XCTAssertEqual([bookClubThread, snackClubThread, aliceThread], threads)
XCTAssertEqual([bookClubThread, aliceThread, snackClubThread], threads)
}
func testSearchConversationByContactByName() {
@ -227,7 +243,7 @@ class ConversationSearcherTest: XCTestCase {
threads = searchConversations(searchText: "Alice")
XCTAssertEqual(3, threads.count)
XCTAssertEqual([bookClubThread, snackClubThread, aliceThread], threads)
XCTAssertEqual([bookClubThread, aliceThread, snackClubThread], threads)
threads = searchConversations(searchText: "Bob")
XCTAssertEqual(1, threads.count)
@ -255,6 +271,86 @@ class ConversationSearcherTest: XCTestCase {
XCTAssert(resultSet.messages.map { $0.thread }.contains(bookClubThread))
}
func testSearchEdgeCases() {
var resultSet: SearchResultSet = .empty
resultSet = getResultSet(searchText: "Hello Alice")
XCTAssertEqual(1, resultSet.messages.count)
XCTAssertEqual(["Hello Alice"], bodies(forMessageResults: resultSet.messages))
resultSet = getResultSet(searchText: "hello alice")
XCTAssertEqual(1, resultSet.messages.count)
XCTAssertEqual(["Hello Alice"], bodies(forMessageResults: resultSet.messages))
resultSet = getResultSet(searchText: "Hel")
XCTAssertEqual(2, resultSet.messages.count)
XCTAssertEqual(["Hello Alice", "Hello Book Club"], bodies(forMessageResults: resultSet.messages))
resultSet = getResultSet(searchText: "Hel Ali")
XCTAssertEqual(1, resultSet.messages.count)
XCTAssertEqual(["Hello Alice"], bodies(forMessageResults: resultSet.messages))
resultSet = getResultSet(searchText: "Hel Ali Alic")
XCTAssertEqual(1, resultSet.messages.count)
XCTAssertEqual(["Hello Alice"], bodies(forMessageResults: resultSet.messages))
resultSet = getResultSet(searchText: "Ali Hel")
XCTAssertEqual(1, resultSet.messages.count)
XCTAssertEqual(["Hello Alice"], bodies(forMessageResults: resultSet.messages))
resultSet = getResultSet(searchText: "CLU")
XCTAssertEqual(2, resultSet.messages.count)
XCTAssertEqual(["Goodbye Book Club", "Hello Book Club"], bodies(forMessageResults: resultSet.messages))
resultSet = getResultSet(searchText: "hello !@##!@#!$^@!@#! alice")
XCTAssertEqual(1, resultSet.messages.count)
XCTAssertEqual(["Hello Alice"], bodies(forMessageResults: resultSet.messages))
resultSet = getResultSet(searchText: "3213 phone")
XCTAssertEqual(1, resultSet.messages.count)
XCTAssertEqual(["My phone number is: 321-321-4321"], bodies(forMessageResults: resultSet.messages))
resultSet = getResultSet(searchText: "PHO 3213")
XCTAssertEqual(1, resultSet.messages.count)
XCTAssertEqual(["My phone number is: 321-321-4321"], bodies(forMessageResults: resultSet.messages))
resultSet = getResultSet(searchText: "fax")
XCTAssertEqual(1, resultSet.messages.count)
XCTAssertEqual(["My fax is: 222-333-4444"], bodies(forMessageResults: resultSet.messages))
resultSet = getResultSet(searchText: "fax 2223")
XCTAssertEqual(1, resultSet.messages.count)
XCTAssertEqual(["My fax is: 222-333-4444"], bodies(forMessageResults: resultSet.messages))
}
func bodies(forMessageResults messageResults: [ConversationSearchResult]) -> [String] {
var result = [String]()
self.dbConnection.read { transaction in
for messageResult in messageResults {
guard let messageId = messageResult.messageId else {
owsFail("message result missing message id")
continue
}
guard let interaction = TSInteraction.fetch(uniqueId: messageId, transaction: transaction) else {
owsFail("couldn't load interaction for message result")
continue
}
guard let message = interaction as? TSMessage else {
owsFail("invalid message for message result")
continue
}
guard let messageBody = message.body else {
owsFail("message result missing message body")
continue
}
result.append(messageBody)
}
}
return result.sorted()
}
// Mark: Helpers
private func searchConversations(searchText: String) -> [ThreadViewModel] {
@ -265,7 +361,7 @@ class ConversationSearcherTest: XCTestCase {
private func getResultSet(searchText: String) -> SearchResultSet {
var results: SearchResultSet!
self.dbConnection.read { transaction in
results = self.searcher.results(searchText: searchText, transaction: transaction)
results = self.searcher.results(searchText: searchText, transaction: transaction, contactsManager: TextSecureKitEnv.shared().contactsManager)
}
return results
}
@ -341,4 +437,24 @@ class SearcherTest: XCTestCase {
XCTAssert(searcher.matches(item: stinkingLizaveta, query: "Liza 323"))
XCTAssertFalse(searcher.matches(item: regularLizaveta, query: "Liza 323"))
}
func testSearchQuery() {
XCTAssertEqual(FullTextSearchFinder.query(searchText: "Liza"), "\"Liza\"*")
XCTAssertEqual(FullTextSearchFinder.query(searchText: "Liza +1-323"), "\"1323\"* \"Liza\"*")
XCTAssertEqual(FullTextSearchFinder.query(searchText: "\"\\ `~!@#$%^&*()_+-={}|[]:;'<>?,./Liza +1-323"), "\"1323\"* \"Liza\"*")
XCTAssertEqual(FullTextSearchFinder.query(searchText: "renaldo RENALDO reñaldo REÑALDO"), "\"RENALDO\"* \"REÑALDO\"* \"renaldo\"* \"reñaldo\"*")
XCTAssertEqual(FullTextSearchFinder.query(searchText: "😏"), "\"😏\"*")
XCTAssertEqual(FullTextSearchFinder.query(searchText: "alice 123 bob 456"), "\"123456\"* \"alice\"* \"bob\"*")
XCTAssertEqual(FullTextSearchFinder.query(searchText: "Li!za"), "\"Liza\"*")
XCTAssertEqual(FullTextSearchFinder.query(searchText: "Liza Liza"), "\"Liza\"*")
XCTAssertEqual(FullTextSearchFinder.query(searchText: "Liza liza"), "\"Liza\"* \"liza\"*")
}
func testTextNormalization() {
XCTAssertEqual(FullTextSearchFinder.normalize(text: "Liza"), "Liza")
XCTAssertEqual(FullTextSearchFinder.normalize(text: "Liza +1-323"), "Liza 1323")
XCTAssertEqual(FullTextSearchFinder.normalize(text: "\"\\ `~!@#$%^&*()_+-={}|[]:;'<>?,./Liza +1-323"), "Liza 1323")
XCTAssertEqual(FullTextSearchFinder.normalize(text: "renaldo RENALDO reñaldo REÑALDO"), "renaldo RENALDO reñaldo REÑALDO")
XCTAssertEqual(FullTextSearchFinder.normalize(text: "😏"), "😏")
}
}

@ -71,7 +71,6 @@ extern NSString *const OWSContactsManagerSignalAccountsDidChangeNotification;
* Used for sorting, respects system contacts name sort order preference.
*/
- (NSString *)comparableNameForSignalAccount:(SignalAccount *)signalAccount;
- (NSComparisonResult)compareSignalAccount:(SignalAccount *)left withSignalAccount:(SignalAccount *)right;
// Generally we prefer the formattedProfileName over the raw profileName so as to
// distinguish a profile name apart from a name pulled from the system's contacts.

@ -39,13 +39,13 @@ public class ConversationSearchResult: Comparable {
public class ContactSearchResult: Comparable {
public let signalAccount: SignalAccount
public let contactsManager: OWSContactsManager
public let contactsManager: ContactsManagerProtocol
public var recipientId: String {
return signalAccount.recipientId
}
init(signalAccount: SignalAccount, contactsManager: OWSContactsManager) {
init(signalAccount: SignalAccount, contactsManager: ContactsManagerProtocol) {
self.signalAccount = signalAccount
self.contactsManager = contactsManager
}
@ -53,7 +53,7 @@ public class ContactSearchResult: Comparable {
// Mark: Comparable
public static func < (lhs: ContactSearchResult, rhs: ContactSearchResult) -> Bool {
return lhs.contactsManager.compareSignalAccount(lhs.signalAccount, with: rhs.signalAccount) == .orderedAscending
return lhs.contactsManager.compare(signalAccount: lhs.signalAccount, with: rhs.signalAccount) == .orderedAscending
}
// MARK: Equatable
@ -99,7 +99,7 @@ public class ConversationSearcher: NSObject {
public func results(searchText: String,
transaction: YapDatabaseReadTransaction,
contactsManager: OWSContactsManager) -> SearchResultSet {
contactsManager: ContactsManagerProtocol) -> SearchResultSet {
var conversations: [ConversationSearchResult] = []
var contacts: [ContactSearchResult] = []

@ -17,6 +17,9 @@ NS_ASSUME_NONNULL_BEGIN
- (BOOL)isSystemContact:(NSString *)recipientId;
- (BOOL)isSystemContactWithSignalAccount:(NSString *)recipientId;
- (NSComparisonResult)compareSignalAccount:(SignalAccount *)left
withSignalAccount:(SignalAccount *)right NS_SWIFT_NAME(compare(signalAccount:with:));
@end
NS_ASSUME_NONNULL_END

@ -18,23 +18,66 @@ public class SearchIndexer<T> {
}
private func normalize(indexingText: String) -> String {
var normalized: String = indexingText.trimmingCharacters(in: .whitespacesAndNewlines)
// Remove any punctuation from the search index
let nonformattingScalars = normalized.unicodeScalars.lazy.filter {
!CharacterSet.punctuationCharacters.contains($0)
}
normalized = String(String.UnicodeScalarView(nonformattingScalars))
return normalized
return FullTextSearchFinder.normalize(text: indexingText)
}
}
@objc
public class FullTextSearchFinder: NSObject {
// Mark: Querying
// MARK: - Querying
// We want to match by prefix for "search as you type" functionality.
// SQLite does not support suffix or contains matches.
public class func query(searchText: String) -> String {
// 1. Normalize the search text.
//
// TODO: We could arguably convert to lowercase since the search
// is case-insensitive.
let normalizedSearchText = FullTextSearchFinder.normalize(text: searchText)
// 2. Split the non-numeric text into query terms (or tokens).
let nonNumericText = String(String.UnicodeScalarView(normalizedSearchText.unicodeScalars.lazy.map {
if CharacterSet.decimalDigits.contains($0) {
return " "
} else {
return $0
}
}))
var queryTerms = nonNumericText.split(separator: " ")
// 3. Add an additional numeric-only query term.
let digitsOnlyScalars = normalizedSearchText.unicodeScalars.lazy.filter {
CharacterSet.decimalDigits.contains($0)
}
let digitsOnly: Substring = Substring(String(String.UnicodeScalarView(digitsOnlyScalars)))
queryTerms.append(digitsOnly)
// 4. De-duplicate and sort query terms.
// Duplicate terms are redundant.
// Sorting terms makes the output of this method deterministic and easier to test,
// and the order won't affect the search results.
queryTerms = Array(Set(queryTerms)).sorted()
// 5. Filter the query terms.
let filteredQueryTerms = queryTerms.filter {
// Ignore empty terms.
$0.count > 0
}.map {
// Allow partial match of each term.
//
// Note that we use double-quotes to enclose each search term.
// Quoted search terms can include a few more characters than
// "bareword" (non-quoted) search terms. This shouldn't matter,
// since we're filtering all of the affected characters, but
// quoting protects us from any bugs in that logic.
"\"\($0)\"*"
}
// 6. Join terms into query string.
let query = filteredQueryTerms.joined(separator: " ")
return query
}
public func enumerateObjects(searchText: String, transaction: YapDatabaseReadTransaction, block: @escaping (Any, String) -> Void) {
guard let ext: YapDatabaseFullTextSearchTransaction = ext(transaction: transaction) else {
@ -42,50 +85,73 @@ public class FullTextSearchFinder: NSObject {
return
}
let normalized = normalize(queryText: searchText)
let query = FullTextSearchFinder.query(searchText: searchText)
// We want to match by prefix for "search as you type" functionality.
// SQLite does not support suffix or contains matches.
let prefixQuery = "\(normalized)*"
Logger.verbose("\(logTag) query: \(query)")
let maxSearchResults = 500
var searchResultCount = 0
let snippetOptions = YapDatabaseFullTextSearchSnippetOptions()
snippetOptions.startMatchText = ""
snippetOptions.endMatchText = ""
ext.enumerateKeysAndObjects(matching: prefixQuery, with: snippetOptions) { (snippet: String, _: String, _: String, object: Any, stop: UnsafeMutablePointer<ObjCBool>) in
ext.enumerateKeysAndObjects(matching: query, with: snippetOptions) { (snippet: String, _: String, _: String, object: Any, stop: UnsafeMutablePointer<ObjCBool>) in
guard searchResultCount < maxSearchResults else {
stop.pointee = true
return
}
searchResultCount = searchResultCount + 1
searchResultCount += 1
block(object, snippet)
}
}
private func normalize(queryText: String) -> String {
var normalized: String = queryText.trimmingCharacters(in: .whitespacesAndNewlines)
// Remove any punctuation from the search terms
let nonformattingScalars = normalized.unicodeScalars.lazy.filter {
!CharacterSet.punctuationCharacters.contains($0)
}
let normalizedChars = String(String.UnicodeScalarView(nonformattingScalars))
let digitsOnlyScalars = normalized.unicodeScalars.lazy.filter {
CharacterSet.decimalDigits.contains($0)
// MARK: - Normalization
fileprivate static var charactersToRemove: CharacterSet = {
// * We want to strip punctuation - and our definition of "punctuation"
// is broader than `CharacterSet.punctuationCharacters`.
// * FTS should be robust to (i.e. ignore) illegal and control characters,
// but it's safer if we filter them ourselves as well.
var charactersToFilter = CharacterSet.punctuationCharacters
charactersToFilter.formUnion(CharacterSet.illegalCharacters)
charactersToFilter.formUnion(CharacterSet.controlCharacters)
// We want to strip all ASCII characters except:
// * Letters a-z, A-Z
// * Numerals 0-9
// * Whitespace
var asciiToFilter = CharacterSet(charactersIn: UnicodeScalar(0x0)!..<UnicodeScalar(0x80)!)
assert(!asciiToFilter.contains(UnicodeScalar(0x80)!))
asciiToFilter.subtract(CharacterSet.alphanumerics)
asciiToFilter.subtract(CharacterSet.whitespacesAndNewlines)
charactersToFilter.formUnion(asciiToFilter)
return charactersToFilter
}()
public class func normalize(text: String) -> String {
// 1. Filter out invalid characters.
let filtered = text.unicodeScalars.lazy.filter({
!charactersToRemove.contains($0)
})
// 2. Simplify whitespace.
let simplifyingFunction: (UnicodeScalar) -> UnicodeScalar = {
if CharacterSet.whitespacesAndNewlines.contains($0) {
return UnicodeScalar(" ")
} else {
return $0
}
}
let normalizedDigits = String(String.UnicodeScalarView(digitsOnlyScalars))
let simplified = filtered.map(simplifyingFunction)
if normalizedDigits.count > 0 {
return "\(normalizedChars) OR \(normalizedDigits)"
} else {
return "\(normalizedChars)"
}
// 3. Strip leading & trailing whitespace last, since we may replace
// filtered characters with whitespace.
let result = String(String.UnicodeScalarView(simplified))
return result.trimmingCharacters(in: .whitespacesAndNewlines)
}
// Mark: Index Building
// MARK: - Index Building
private class var contactsManager: ContactsManagerProtocol {
return TextSecureKitEnv.shared().contactsManager
@ -153,7 +219,7 @@ public class FullTextSearchFinder: NSObject {
// MARK: - Extension Registration
private static let dbExtensionName: String = "FullTextSearchFinderExtension)"
private static let dbExtensionName: String = "FullTextSearchFinderExtension"
private func ext(transaction: YapDatabaseReadTransaction) -> YapDatabaseFullTextSearchTransaction? {
return transaction.ext(FullTextSearchFinder.dbExtensionName) as? YapDatabaseFullTextSearchTransaction
@ -179,8 +245,11 @@ public class FullTextSearchFinder: NSObject {
}
// update search index on contact name changes?
// update search index on message insertion?
return YapDatabaseFullTextSearch(columnNames: ["content"], handler: handler)
return YapDatabaseFullTextSearch(columnNames: ["content"],
options: nil,
handler: handler,
ftsVersion: YapDatabaseFullTextSearchFTS5Version,
versionTag: "1")
}
}

@ -542,10 +542,13 @@ NSString *const kNSUserDefaults_DatabaseExtensionVersionMap = @"kNSUserDefaults_
YapDatabaseFullTextSearch *fullTextSearch = (YapDatabaseFullTextSearch *)extension;
NSString *versionTag = [self appendSuffixToDatabaseExtensionVersionIfNecessary:fullTextSearch.versionTag extensionName:extensionName];
YapDatabaseFullTextSearch *fullTextSearchCopy = [[YapDatabaseFullTextSearch alloc] initWithColumnNames:fullTextSearch->columnNames.array
handler:fullTextSearch->handler
versionTag:versionTag];
YapDatabaseFullTextSearch *fullTextSearchCopy =
[[YapDatabaseFullTextSearch alloc] initWithColumnNames:fullTextSearch->columnNames.array
options:fullTextSearch->options
handler:fullTextSearch->handler
ftsVersion:fullTextSearch->ftsVersion
versionTag:versionTag];
return fullTextSearchCopy;
} else if ([extension isKindOfClass:[YapDatabaseCrossProcessNotification class]]) {
// versionTag doesn't matter for YapDatabaseCrossProcessNotification.

Loading…
Cancel
Save