Filter high diacritical text, regardless of length

// FREEBIE
pull/1/head
Michael Kirk 8 years ago
parent 78b98a5a91
commit 6036e20079

@ -121,7 +121,7 @@
if ([interaction isKindOfClass:[TSIncomingMessage class]] ||
[interaction isKindOfClass:[TSOutgoingMessage class]]) {
TSMessage *message = (TSMessage *)interaction;
adapter.messageBody = message.body;
adapter.messageBody = [[DisplayableTextFilter new] displayableText:message.body];
if ([message hasAttachments]) {
for (NSString *attachmentID in message.attachmentIds) {
@ -167,11 +167,6 @@
NSStringFromClass([attachment class]));
}
}
} else { // no attachment, plain text message
if ([[DisplayableTextFilter new] shouldPreventDisplayOfText:adapter.messageBody]) {
adapter.messageType = TSInfoMessageAdapter;
adapter.messageBody = NSLocalizedString(@"INFO_MESSAGE_UNABLE_TO_DISPLAY_MESSAGE", @"Generic error text when message contents are undisplayable");
}
}
} else if ([interaction isKindOfClass:[TSCall class]]) {
TSCall *callRecord = (TSCall *)interaction;

@ -6,36 +6,31 @@ import Foundation
@objc class DisplayableTextFilter: NSObject {
// don't bother filtering on small text, lest we inadvertently catch legitimate usage of rare code point stacking
let allowAnyTextLessThanByteSize: Int
let TAG = "[DisplayableTextFilter]"
convenience override init() {
self.init(allowAnyTextLessThanByteSize: 10000)
}
required init(allowAnyTextLessThanByteSize: Int) {
self.allowAnyTextLessThanByteSize = allowAnyTextLessThanByteSize
}
@objc(shouldPreventDisplayOfText:)
func shouldPreventDisplay(text: String?) -> Bool {
@objc
func displayableText(_ text: String?) -> String? {
guard let text = text else {
return false
return nil
}
let byteCount = text.lengthOfBytes(using: .utf8)
guard byteCount >= allowAnyTextLessThanByteSize else {
return false
if (self.hasExcessiveDiacriticals(text: text)) {
return text.folding(options: .diacriticInsensitive, locale: .current)
}
let characterCount = text.characters.count
// discard any zalgo style text, which we detect by enforcing avg bytes per character ratio.
if byteCount / characterCount > 10 {
Logger.warn("filtering undisplayable text bytes: \(byteCount), characterCount: \(characterCount)")
return true
} else {
return false
return text
}
private func hasExcessiveDiacriticals(text: String) -> Bool {
// discard any zalgo style text, by detecting maximum number of glyphs per character
for char in text.characters.enumerated() {
let scalarCount = String(char.element).unicodeScalars.count
if scalarCount > 4 {
Logger.warn("\(TAG) filtering undisplayable text \(char.element) scalarCount: \(scalarCount)")
return true
}
}
return false
}
}

@ -63,12 +63,8 @@ NS_ASSUME_NONNULL_BEGIN
}
UIImage *avatar = [OWSAvatarBuilder buildImageForThread:thread contactsManager:contactsManager];
self.threadId = thread.uniqueId;
NSString *snippetLabel;
if ([[DisplayableTextFilter new] shouldPreventDisplayOfText:thread.lastMessageLabel]) {
snippetLabel = NSLocalizedString(@"INFO_MESSAGE_UNABLE_TO_DISPLAY_MESSAGE", @"Generic error text when message contents are undisplayable");
} else {
snippetLabel = thread.lastMessageLabel;
}
NSString *snippetLabel = [[DisplayableTextFilter new] displayableText:thread.lastMessageLabel];
NSAttributedString *attributedDate = [self dateAttributedString:thread.lastMessageDate];
NSUInteger unreadCount = [[TSMessagesManager sharedManager] unreadMessagesInThread:thread];

@ -16,12 +16,25 @@ class DisplayableTextFilterTest: XCTestCase {
super.tearDown()
}
func testFiltering() {
func testDisplayableText() {
// Ignore default byte size limitations to test other filtering behaviors
let filter = DisplayableTextFilter(allowAnyTextLessThanByteSize: 0)
let filter = DisplayableTextFilter()
XCTAssertFalse( filter.shouldPreventDisplay(text: "normal text") )
XCTAssertFalse( filter.shouldPreventDisplay(text: "🇹🇹🌼🇹🇹🌼🇹🇹") )
XCTAssertTrue( filter.shouldPreventDisplay(text: "L̷̳͔̲͝Ģ̵̮̯̤̩̙͍̬̟͉̹̘̹͍͈̮̦̰̣͟͝O̶̴̮̻̮̗͘͡!̴̷̟͓͓") )
// show plain text
let boringText = "boring text"
XCTAssertEqual(boringText, filter.displayableText(boringText))
// show high byte emojis
let emojiText = "🇹🇹🌼🇹🇹🌼🇹🇹"
XCTAssertEqual(emojiText, filter.displayableText(emojiText))
// show normal diacritic usage
let diacriticalText = "Příliš žluťoučký kůň úpěl ďábelské ódy."
XCTAssertEqual(diacriticalText, filter.displayableText(diacriticalText))
// filter excessive diacritics
XCTAssertEqual("HAVING TROUBLE READING TEXT?", filter.displayableText("H҉̸̧͘͠A͢͞V̛̛I̴̸N͏̕͏G҉̵͜͏͢ ̧̧́T̶̛͘͡R̸̵̨̢̀O̷̡U͡҉B̶̛͢͞L̸̸͘͢͟É̸ ̸̛͘͏R͟È͠͞A̸͝Ḑ̕͘͜I̵͘҉͜͞N̷̡̢͠G̴͘͠ ͟͞T͏̢́͡È̀X̕҉̢̀T̢͠?̕͏̢͘͢") )
XCTAssertEqual("LGO!", filter.displayableText("L̷̳͔̲͝Ģ̵̮̯̤̩̙͍̬̟͉̹̘̹͍͈̮̦̰̣͟͝O̶̴̮̻̮̗͘͡!̴̷̟͓͓"))
}
}

Loading…
Cancel
Save