From 6036e20079f35e01d0c919276f1630cc24f10869 Mon Sep 17 00:00:00 2001 From: Michael Kirk Date: Sun, 19 Mar 2017 09:44:53 -0400 Subject: [PATCH 1/2] Filter high diacritical text, regardless of length // FREEBIE --- .../TSMessageAdapaters/TSMessageAdapter.m | 7 +-- Signal/src/util/DisplayableTextFilter.swift | 43 ++++++++----------- .../src/view controllers/InboxTableViewCell.m | 8 +--- .../test/util/DisplayableTextFilterTest.swift | 23 +++++++--- 4 files changed, 40 insertions(+), 41 deletions(-) diff --git a/Signal/src/Models/TSMessageAdapaters/TSMessageAdapter.m b/Signal/src/Models/TSMessageAdapaters/TSMessageAdapter.m index 0d4faab01..935fa772d 100644 --- a/Signal/src/Models/TSMessageAdapaters/TSMessageAdapter.m +++ b/Signal/src/Models/TSMessageAdapaters/TSMessageAdapter.m @@ -121,7 +121,7 @@ if ([interaction isKindOfClass:[TSIncomingMessage class]] || [interaction isKindOfClass:[TSOutgoingMessage class]]) { TSMessage *message = (TSMessage *)interaction; - adapter.messageBody = message.body; + adapter.messageBody = [[DisplayableTextFilter new] displayableText:message.body]; if ([message hasAttachments]) { for (NSString *attachmentID in message.attachmentIds) { @@ -167,11 +167,6 @@ NSStringFromClass([attachment class])); } } - } else { // no attachment, plain text message - if ([[DisplayableTextFilter new] shouldPreventDisplayOfText:adapter.messageBody]) { - adapter.messageType = TSInfoMessageAdapter; - adapter.messageBody = NSLocalizedString(@"INFO_MESSAGE_UNABLE_TO_DISPLAY_MESSAGE", @"Generic error text when message contents are undisplayable"); - } } } else if ([interaction isKindOfClass:[TSCall class]]) { TSCall *callRecord = (TSCall *)interaction; diff --git a/Signal/src/util/DisplayableTextFilter.swift b/Signal/src/util/DisplayableTextFilter.swift index d759a2e26..db907ec8c 100644 --- a/Signal/src/util/DisplayableTextFilter.swift +++ b/Signal/src/util/DisplayableTextFilter.swift @@ -6,36 +6,31 @@ import Foundation @objc class DisplayableTextFilter: NSObject { - // don't bother filtering on small text, lest we inadvertently catch legitimate usage of rare code point stacking - let allowAnyTextLessThanByteSize: Int + let TAG = "[DisplayableTextFilter]" - convenience override init() { - self.init(allowAnyTextLessThanByteSize: 10000) - } - - required init(allowAnyTextLessThanByteSize: Int) { - self.allowAnyTextLessThanByteSize = allowAnyTextLessThanByteSize - } - - @objc(shouldPreventDisplayOfText:) - func shouldPreventDisplay(text: String?) -> Bool { + @objc + func displayableText(_ text: String?) -> String? { guard let text = text else { - return false + return nil } - let byteCount = text.lengthOfBytes(using: .utf8) - - guard byteCount >= allowAnyTextLessThanByteSize else { - return false + if (self.hasExcessiveDiacriticals(text: text)) { + return text.folding(options: .diacriticInsensitive, locale: .current) } - let characterCount = text.characters.count - // discard any zalgo style text, which we detect by enforcing avg bytes per character ratio. - if byteCount / characterCount > 10 { - Logger.warn("filtering undisplayable text bytes: \(byteCount), characterCount: \(characterCount)") - return true - } else { - return false + return text + } + + private func hasExcessiveDiacriticals(text: String) -> Bool { + // discard any zalgo style text, by detecting maximum number of glyphs per character + for char in text.characters.enumerated() { + let scalarCount = String(char.element).unicodeScalars.count + if scalarCount > 4 { + Logger.warn("\(TAG) filtering undisplayable text \(char.element) scalarCount: \(scalarCount)") + return true + } } + + return false } } diff --git a/Signal/src/view controllers/InboxTableViewCell.m b/Signal/src/view controllers/InboxTableViewCell.m index 4843b2493..0087c947d 100644 --- a/Signal/src/view controllers/InboxTableViewCell.m +++ b/Signal/src/view controllers/InboxTableViewCell.m @@ -63,12 +63,8 @@ NS_ASSUME_NONNULL_BEGIN } UIImage *avatar = [OWSAvatarBuilder buildImageForThread:thread contactsManager:contactsManager]; self.threadId = thread.uniqueId; - NSString *snippetLabel; - if ([[DisplayableTextFilter new] shouldPreventDisplayOfText:thread.lastMessageLabel]) { - snippetLabel = NSLocalizedString(@"INFO_MESSAGE_UNABLE_TO_DISPLAY_MESSAGE", @"Generic error text when message contents are undisplayable"); - } else { - snippetLabel = thread.lastMessageLabel; - } + NSString *snippetLabel = [[DisplayableTextFilter new] displayableText:thread.lastMessageLabel]; + NSAttributedString *attributedDate = [self dateAttributedString:thread.lastMessageDate]; NSUInteger unreadCount = [[TSMessagesManager sharedManager] unreadMessagesInThread:thread]; diff --git a/Signal/test/util/DisplayableTextFilterTest.swift b/Signal/test/util/DisplayableTextFilterTest.swift index dc504a3cb..5a94660bb 100644 --- a/Signal/test/util/DisplayableTextFilterTest.swift +++ b/Signal/test/util/DisplayableTextFilterTest.swift @@ -16,12 +16,25 @@ class DisplayableTextFilterTest: XCTestCase { super.tearDown() } - func testFiltering() { + func testDisplayableText() { // Ignore default byte size limitations to test other filtering behaviors - let filter = DisplayableTextFilter(allowAnyTextLessThanByteSize: 0) + let filter = DisplayableTextFilter() - XCTAssertFalse( filter.shouldPreventDisplay(text: "normal text") ) - XCTAssertFalse( filter.shouldPreventDisplay(text: "🇹🇹🌼🇹🇹🌼🇹🇹") ) - XCTAssertTrue( filter.shouldPreventDisplay(text: "L̷̳͔̲͝Ģ̵̮̯̤̩̙͍̬̟͉̹̘̹͍͈̮̦̰̣͟͝O̶̴̮̻̮̗͘͡!̴̷̟͓͓") ) + // show plain text + let boringText = "boring text" + XCTAssertEqual(boringText, filter.displayableText(boringText)) + + // show high byte emojis + let emojiText = "🇹🇹🌼🇹🇹🌼🇹🇹" + XCTAssertEqual(emojiText, filter.displayableText(emojiText)) + + // show normal diacritic usage + let diacriticalText = "Příliš žluťoučký kůň úpěl ďábelské ódy." + XCTAssertEqual(diacriticalText, filter.displayableText(diacriticalText)) + + // filter excessive diacritics + XCTAssertEqual("HAVING TROUBLE READING TEXT?", filter.displayableText("H҉̸̧͘͠A͢͞V̛̛I̴̸N͏̕͏G҉̵͜͏͢ ̧̧́T̶̛͘͡R̸̵̨̢̀O̷̡U͡҉B̶̛͢͞L̸̸͘͢͟É̸ ̸̛͘͏R͟È͠͞A̸͝Ḑ̕͘͜I̵͘҉͜͞N̷̡̢͠G̴͘͠ ͟͞T͏̢́͡È̀X̕҉̢̀T̢͠?̕͏̢͘͢") ) + + XCTAssertEqual("LGO!", filter.displayableText("L̷̳͔̲͝Ģ̵̮̯̤̩̙͍̬̟͉̹̘̹͍͈̮̦̰̣͟͝O̶̴̮̻̮̗͘͡!̴̷̟͓͓")) } } From 0b815235976bfbe89755bbf6f55aa9ee09f4b1c7 Mon Sep 17 00:00:00 2001 From: Michael Kirk Date: Wed, 22 Mar 2017 15:03:04 -0400 Subject: [PATCH 2/2] Clearer logging and added an assert per CR // FREEBIE --- Signal/src/util/DisplayableTextFilter.swift | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Signal/src/util/DisplayableTextFilter.swift b/Signal/src/util/DisplayableTextFilter.swift index db907ec8c..90b97bc46 100644 --- a/Signal/src/util/DisplayableTextFilter.swift +++ b/Signal/src/util/DisplayableTextFilter.swift @@ -15,7 +15,10 @@ import Foundation } if (self.hasExcessiveDiacriticals(text: text)) { - return text.folding(options: .diacriticInsensitive, locale: .current) + Logger.warn("\(TAG) filtering text for excessive diacriticals.") + let filteredText = text.folding(options: .diacriticInsensitive, locale: .current) + assert(!self.hasExcessiveDiacriticals(filteredText)) + return filteredText } return text @@ -26,7 +29,7 @@ import Foundation for char in text.characters.enumerated() { let scalarCount = String(char.element).unicodeScalars.count if scalarCount > 4 { - Logger.warn("\(TAG) filtering undisplayable text \(char.element) scalarCount: \(scalarCount)") + Logger.warn("\(TAG) detected excessive diacriticals at \(char.element) scalarCount: \(scalarCount)") return true } }