From 55376975f5806bf6529c49fd62c49db86238070c Mon Sep 17 00:00:00 2001 From: Matthew Chen Date: Mon, 14 Jan 2019 12:08:50 -0500 Subject: [PATCH] Add link preview parsing and validation logic. --- .../Interactions/OWSLinkPreview.swift | 78 +++++++++++++++++ .../tests/Messages/OWSLinkPreviewTest.swift | 86 +++++++++++++++++++ 2 files changed, 164 insertions(+) create mode 100644 SignalServiceKit/tests/Messages/OWSLinkPreviewTest.swift diff --git a/SignalServiceKit/src/Messages/Interactions/OWSLinkPreview.swift b/SignalServiceKit/src/Messages/Interactions/OWSLinkPreview.swift index f5f0634be..bda7d11cd 100644 --- a/SignalServiceKit/src/Messages/Interactions/OWSLinkPreview.swift +++ b/SignalServiceKit/src/Messages/Interactions/OWSLinkPreview.swift @@ -97,4 +97,82 @@ public class OWSLinkPreview: MTLModel { } attachment.remove(with: transaction) } + + // MARK: - Domain Whitelist + + private static let linkDomainWhitelist = [ + "youtube.com", + "reddit.com", + "imgur.com", + "instagram.com" + ] + + private static let mediaDomainWhitelist = [ + "ytimg.com", + "cdninstagram.com" + ] + + private static let protocolWhitelist = [ + "https" + ] + + @objc + public class func isValidLinkUrl(_ urlString: String) -> Bool { + guard let url = URL(string: urlString) else { + return false + } + return isUrlInDomainWhitelist(url: url, + domainWhitelist: OWSLinkPreview.linkDomainWhitelist) + } + + @objc + public class func isValidMediaUrl(_ urlString: String) -> Bool { + guard let url = URL(string: urlString) else { + return false + } + return isUrlInDomainWhitelist(url: url, + domainWhitelist: OWSLinkPreview.linkDomainWhitelist + OWSLinkPreview.mediaDomainWhitelist) + } + + private class func isUrlInDomainWhitelist(url: URL, domainWhitelist: [String]) -> Bool { + guard let urlProtocol = url.scheme?.lowercased() else { + return false + } + guard protocolWhitelist.contains(urlProtocol) else { + return false + } + guard let domain = url.host?.lowercased() else { + return false + } + // TODO: We need to verify: + // + // * The final domain whitelist. + // * The relationship between the "link" whitelist and the "media" whitelist. + // * Exact match or suffix-based? + // * Case-insensitive? + // * Protocol? + for whitelistedDomain in domainWhitelist { + if domain == whitelistedDomain.lowercased() || + domain.hasSuffix("." + whitelistedDomain.lowercased()) { + return true + } + } + return false + } + + // MARK: - Text Parsing + + @objc + public class func previewUrl(forMessageBodyText body: String?) -> String? { + guard let body = body else { + return nil + } + let components = body.components(separatedBy: .whitespacesAndNewlines) + for component in components { + if isValidLinkUrl(component) { + return component + } + } + return nil + } } diff --git a/SignalServiceKit/tests/Messages/OWSLinkPreviewTest.swift b/SignalServiceKit/tests/Messages/OWSLinkPreviewTest.swift new file mode 100644 index 000000000..f05213fb4 --- /dev/null +++ b/SignalServiceKit/tests/Messages/OWSLinkPreviewTest.swift @@ -0,0 +1,86 @@ +// +// Copyright (c) 2019 Open Whisper Systems. All rights reserved. +// + +import Foundation +import SignalServiceKit +import XCTest + +class OWSLinkPreviewTest: SSKBaseTestSwift { + + override func setUp() { + super.setUp() + // Put setup code here. This method is called before the invocation of each test method in the class. + } + + override func tearDown() { + // Put teardown code here. This method is called after the invocation of each test method in the class. + super.tearDown() + } + + func testIsValidLinkUrl() { + XCTAssertTrue(OWSLinkPreview.isValidLinkUrl("https://www.youtube.com/watch?v=tP-Ipsat90c")) + XCTAssertTrue(OWSLinkPreview.isValidLinkUrl("https://youtube.com/watch?v=tP-Ipsat90c")) + XCTAssertTrue(OWSLinkPreview.isValidLinkUrl("https://www.youtube.com")) + + // Allow arbitrary subdomains. + XCTAssertTrue(OWSLinkPreview.isValidMediaUrl("https://some.random.subdomain.youtube.com/watch?v=tP-Ipsat90c")) + + // Don't allow HTTP, only HTTPS + XCTAssertFalse(OWSLinkPreview.isValidLinkUrl("http://youtube.com/watch?v=tP-Ipsat90c")) + XCTAssertFalse(OWSLinkPreview.isValidLinkUrl("mailto://youtube.com/watch?v=tP-Ipsat90c")) + XCTAssertFalse(OWSLinkPreview.isValidLinkUrl("ftp://youtube.com/watch?v=tP-Ipsat90c")) + + // Don't allow similar domains. + XCTAssertFalse(OWSLinkPreview.isValidLinkUrl("https://xyoutube.com/watch?v=tP-Ipsat90c")) + XCTAssertFalse(OWSLinkPreview.isValidLinkUrl("https://youtubex.com/watch?v=tP-Ipsat90c")) + XCTAssertFalse(OWSLinkPreview.isValidLinkUrl("https://youtube.comx/watch?v=tP-Ipsat90c")) + XCTAssertFalse(OWSLinkPreview.isValidLinkUrl("https://www.xyoutube.com/watch?v=tP-Ipsat90c")) + XCTAssertFalse(OWSLinkPreview.isValidLinkUrl("https://www.youtubex.com/watch?v=tP-Ipsat90c")) + XCTAssertFalse(OWSLinkPreview.isValidLinkUrl("https://www.youtube.comx/watch?v=tP-Ipsat90c")) + + // Don't allow media domains. + XCTAssertFalse(OWSLinkPreview.isValidLinkUrl("https://i.ytimg.com/vi/tP-Ipsat90c/maxresdefault.jpg")) + } + + func testIsValidMediaUrl() { + XCTAssertTrue(OWSLinkPreview.isValidMediaUrl("https://www.youtube.com/watch?v=tP-Ipsat90c")) + XCTAssertTrue(OWSLinkPreview.isValidMediaUrl("https://youtube.com/watch?v=tP-Ipsat90c")) + XCTAssertTrue(OWSLinkPreview.isValidMediaUrl("https://www.youtube.com")) + + // Allow arbitrary subdomains. + XCTAssertTrue(OWSLinkPreview.isValidMediaUrl("https://some.random.subdomain.youtube.com/watch?v=tP-Ipsat90c")) + + // Don't allow HTTP, only HTTPS + XCTAssertFalse(OWSLinkPreview.isValidMediaUrl("http://youtube.com/watch?v=tP-Ipsat90c")) + XCTAssertFalse(OWSLinkPreview.isValidMediaUrl("mailto://youtube.com/watch?v=tP-Ipsat90c")) + XCTAssertFalse(OWSLinkPreview.isValidMediaUrl("ftp://youtube.com/watch?v=tP-Ipsat90c")) + + // Don't allow similar domains. + XCTAssertFalse(OWSLinkPreview.isValidMediaUrl("https://xyoutube.com/watch?v=tP-Ipsat90c")) + XCTAssertFalse(OWSLinkPreview.isValidMediaUrl("https://youtubex.com/watch?v=tP-Ipsat90c")) + XCTAssertFalse(OWSLinkPreview.isValidMediaUrl("https://youtube.comx/watch?v=tP-Ipsat90c")) + XCTAssertFalse(OWSLinkPreview.isValidMediaUrl("https://www.xyoutube.com/watch?v=tP-Ipsat90c")) + XCTAssertFalse(OWSLinkPreview.isValidMediaUrl("https://www.youtubex.com/watch?v=tP-Ipsat90c")) + XCTAssertFalse(OWSLinkPreview.isValidMediaUrl("https://www.youtube.comx/watch?v=tP-Ipsat90c")) + + // Allow media domains. + XCTAssertTrue(OWSLinkPreview.isValidMediaUrl("https://i.ytimg.com/vi/tP-Ipsat90c/maxresdefault.jpg")) + } + + func testPreviewUrlForMessageBodyText() { + XCTAssertNil(OWSLinkPreview.previewUrl(forMessageBodyText: "")) + XCTAssertNil(OWSLinkPreview.previewUrl(forMessageBodyText: "alice bob jim")) + XCTAssertNil(OWSLinkPreview.previewUrl(forMessageBodyText: "alice bob jim http://")) + XCTAssertNil(OWSLinkPreview.previewUrl(forMessageBodyText: "alice bob jim http://a.com")) + + XCTAssertEqual(OWSLinkPreview.previewUrl(forMessageBodyText: "https://www.youtube.com/watch?v=tP-Ipsat90c"), + "https://www.youtube.com/watch?v=tP-Ipsat90c") + XCTAssertEqual(OWSLinkPreview.previewUrl(forMessageBodyText: "alice bob https://www.youtube.com/watch?v=tP-Ipsat90c jim"), + "https://www.youtube.com/watch?v=tP-Ipsat90c") + + // If there are more than one, take the first. + XCTAssertEqual(OWSLinkPreview.previewUrl(forMessageBodyText: "alice bob https://www.youtube.com/watch?v=tP-Ipsat90c jim https://www.youtube.com/watch?v=other-url carol"), + "https://www.youtube.com/watch?v=tP-Ipsat90c") + } +}