Filter search index text.

pull/1/head
Matthew Chen 7 years ago
parent b5e0265758
commit f5a5d84edc

@ -18,17 +18,7 @@ public class SearchIndexer<T> {
} }
private func normalize(indexingText: String) -> String { private func normalize(indexingText: String) -> String {
var normalized: String = indexingText.trimmingCharacters(in: .whitespacesAndNewlines) return FullTextSearchFinder.sanitize(text: indexingText)
// Remove any punctuation from the search index
let nonformattingScalars = normalized.unicodeScalars.lazy.filter {
!CharacterSet.punctuationCharacters.contains($0)
}
normalized = String(String.UnicodeScalarView(nonformattingScalars))
return normalized
// return FullTextSearchFinder.filterIndexOrQueryText(text: indexingText)
} }
} }
@ -37,24 +27,42 @@ public class FullTextSearchFinder: NSObject {
// Mark: Querying // Mark: Querying
// We want to match by prefix for "search as you type" functionality.
// SQLite does not support suffix or contains matches.
public class func query(searchText: String) -> String {
// 1. Normalize the search text.
let normalizedSearchText = normalize(queryText: searchText)
// 2. Split into tokens.
let queryTerms = normalizedSearchText.split(separator: " ").filter {
// Ignore empty tokens.
$0.count > 0
}.map {
// Allow partial match of each token.
$0 + "*"
}
// 3. Join tokens into query string.
let query = queryTerms.joined(separator: " ")
return query
}
public func enumerateObjects(searchText: String, transaction: YapDatabaseReadTransaction, block: @escaping (Any, String) -> Void) { public func enumerateObjects(searchText: String, transaction: YapDatabaseReadTransaction, block: @escaping (Any, String) -> Void) {
guard let ext: YapDatabaseFullTextSearchTransaction = ext(transaction: transaction) else { guard let ext: YapDatabaseFullTextSearchTransaction = ext(transaction: transaction) else {
owsFail("\(logTag) ext was unexpectedly nil") owsFail("\(logTag) ext was unexpectedly nil")
return return
} }
let normalized = normalize(queryText: searchText) let query = FullTextSearchFinder.query(searchText: searchText)
// We want to match by prefix for "search as you type" functionality. Logger.verbose("\(logTag) query: \(query)")
// SQLite does not support suffix or contains matches.
let prefixQuery = "\(normalized)*"
let maxSearchResults = 500 let maxSearchResults = 500
var searchResultCount = 0 var searchResultCount = 0
let snippetOptions = YapDatabaseFullTextSearchSnippetOptions() let snippetOptions = YapDatabaseFullTextSearchSnippetOptions()
snippetOptions.startMatchText = "" snippetOptions.startMatchText = ""
snippetOptions.endMatchText = "" snippetOptions.endMatchText = ""
ext.enumerateKeysAndObjects(matching: prefixQuery, with: snippetOptions) { (snippet: String, _: String, _: String, object: Any, stop: UnsafeMutablePointer<ObjCBool>) in ext.enumerateKeysAndObjects(matching: query, with: snippetOptions) { (snippet: String, _: String, _: String, object: Any, stop: UnsafeMutablePointer<ObjCBool>) in
guard searchResultCount < maxSearchResults else { guard searchResultCount < maxSearchResults else {
stop.pointee = true stop.pointee = true
return return
@ -66,64 +74,49 @@ public class FullTextSearchFinder: NSObject {
} }
// Mark: Filtering // Mark: Filtering
// private class func characterSet(fromCharacter: UInt32, toCharacter: UInt32) -> CharacterSet { fileprivate class func charactersToRemove() -> CharacterSet {
// var string = "" var charactersToFilter = CharacterSet.punctuationCharacters
// // Add to include last character. charactersToFilter.formUnion(CharacterSet.illegalCharacters)
// for character in fromCharacter ..< toCharacter + 1 { charactersToFilter.formUnion(CharacterSet.controlCharacters)
// guard let chr = Unicode.Scalar(character) else { charactersToFilter.formUnion(CharacterSet.symbols)
// assertionFailure("\(self.logTag) could not parse character.") return charactersToFilter
// continue }
// }
// string += String(chr) fileprivate class func separatorCharacters() -> CharacterSet {
// } let separatorCharacters = CharacterSet.whitespacesAndNewlines
// return CharacterSet(charactersIn: string) return separatorCharacters
// } }
//
// private static var kFilterCharacters: CharacterSet = { fileprivate class func sanitize(text: String) -> String {
// var set = CharacterSet() // 1. Filter out invalid characters.
// set.formUnion(FullTextSearchFinder.characterSet(fromCharacter: 0, toCharacter: 31)) let filtered = text.unicodeScalars.lazy.filter({
// set.formUnion(FullTextSearchFinder.characterSet(fromCharacter: 33, toCharacter: 47)) !charactersToRemove().contains($0)
// set.formUnion(FullTextSearchFinder.characterSet(fromCharacter: 58, toCharacter: 64)) })
// set.formUnion(FullTextSearchFinder.characterSet(fromCharacter: 91, toCharacter: 96))
// set.formUnion(FullTextSearchFinder.characterSet(fromCharacter: 123, toCharacter: 126)) // 2. Simplify whitespace.
// return set let simplifyingFunction: (UnicodeScalar) -> UnicodeScalar = {
// }() if separatorCharacters().contains($0) {
// return UnicodeScalar(" ")
// public class func filterIndexOrQueryText(text: String) -> String { } else {
// let filteredScalars = String(text.unicodeScalars.lazy.map { return $0
// if kFilterCharacters.contains($0) { }
// return " "
// } else {
// return Character($0)
// }
// })
//
// // Remove any phone number formatting from the search terms
// let nonformattingScalars = filteredScalars.unicodeScalars.lazy.filter {
// !CharacterSet.punctuationCharacters.contains($0)
// }
//
// var normalized = String(String.UnicodeScalarView(nonformattingScalars))
//
// // Simplify the normalized text by combining adjacent whitespace.
// while normalized.contains(" ") {
// normalized = normalized.replacingOccurrences(of: " ", with: " ")
// }
//
// // We strip leading & trailing whitespace last, since we may replace
// // filtered characters with whitespace.
// return normalized.trimmingCharacters(in: .whitespacesAndNewlines)
// }
private func normalize(queryText: String) -> String {
var normalized: String = queryText.trimmingCharacters(in: .whitespacesAndNewlines)
// Remove any punctuation from the search terms
let nonformattingScalars = normalized.unicodeScalars.lazy.filter {
!CharacterSet.punctuationCharacters.contains($0)
} }
let normalizedChars = String(String.UnicodeScalarView(nonformattingScalars)) let simplified = filtered.map(simplifyingFunction)
// 3. Combine adjacent whitespace.
var result = String(String.UnicodeScalarView(simplified))
while result.contains(" ") {
result = result.replacingOccurrences(of: " ", with: " ")
}
// 4. Strip leading & trailing whitespace last, since we may replace
// filtered characters with whitespace.
return result.trimmingCharacters(in: .whitespacesAndNewlines)
}
private class func normalize(queryText: String) -> String {
var normalized: String = FullTextSearchFinder.sanitize(text: queryText)
let digitsOnlyScalars = normalized.unicodeScalars.lazy.filter { let digitsOnlyScalars = normalized.unicodeScalars.lazy.filter {
CharacterSet.decimalDigits.contains($0) CharacterSet.decimalDigits.contains($0)
@ -131,9 +124,9 @@ public class FullTextSearchFinder: NSObject {
let normalizedDigits = String(String.UnicodeScalarView(digitsOnlyScalars)) let normalizedDigits = String(String.UnicodeScalarView(digitsOnlyScalars))
if normalizedDigits.count > 0 { if normalizedDigits.count > 0 {
return "\(normalizedChars) OR \(normalizedDigits)" return "\(normalized) OR \(normalizedDigits)"
} else { } else {
return "\(normalizedChars)" return normalized
} }
} }
@ -231,8 +224,11 @@ public class FullTextSearchFinder: NSObject {
} }
// update search index on contact name changes? // update search index on contact name changes?
// update search index on message insertion?
return YapDatabaseFullTextSearch(columnNames: ["content"], handler: handler) return YapDatabaseFullTextSearch(columnNames: ["content"],
options: nil,
handler: handler,
ftsVersion: YapDatabaseFullTextSearchFTS5Version,
versionTag: "1")
} }
} }

@ -542,10 +542,13 @@ NSString *const kNSUserDefaults_DatabaseExtensionVersionMap = @"kNSUserDefaults_
YapDatabaseFullTextSearch *fullTextSearch = (YapDatabaseFullTextSearch *)extension; YapDatabaseFullTextSearch *fullTextSearch = (YapDatabaseFullTextSearch *)extension;
NSString *versionTag = [self appendSuffixToDatabaseExtensionVersionIfNecessary:fullTextSearch.versionTag extensionName:extensionName]; NSString *versionTag = [self appendSuffixToDatabaseExtensionVersionIfNecessary:fullTextSearch.versionTag extensionName:extensionName];
YapDatabaseFullTextSearch *fullTextSearchCopy = [[YapDatabaseFullTextSearch alloc] initWithColumnNames:fullTextSearch->columnNames.array YapDatabaseFullTextSearch *fullTextSearchCopy =
handler:fullTextSearch->handler [[YapDatabaseFullTextSearch alloc] initWithColumnNames:fullTextSearch->columnNames.array
versionTag:versionTag]; options:fullTextSearch->options
handler:fullTextSearch->handler
ftsVersion:fullTextSearch->ftsVersion
versionTag:versionTag];
return fullTextSearchCopy; return fullTextSearchCopy;
} else if ([extension isKindOfClass:[YapDatabaseCrossProcessNotification class]]) { } else if ([extension isKindOfClass:[YapDatabaseCrossProcessNotification class]]) {
// versionTag doesn't matter for YapDatabaseCrossProcessNotification. // versionTag doesn't matter for YapDatabaseCrossProcessNotification.

Loading…
Cancel
Save