Elaborate the link preview tests.

pull/1/head
Matthew Chen 6 years ago
parent 4e1098475b
commit 2e9f2e6154

@ -17,6 +17,21 @@ public enum LinkPreviewError: Int, Error {
// MARK: - OWSLinkPreviewDraft // MARK: - OWSLinkPreviewDraft
public class OWSLinkPreviewContents: NSObject {
@objc
public var title: String?
@objc
public var imageUrl: String?
public init(title: String?, imageUrl: String? = nil) {
self.title = title
self.imageUrl = imageUrl
super.init()
}
}
// This contains the info for a link preview "draft". // This contains the info for a link preview "draft".
public class OWSLinkPreviewDraft: NSObject { public class OWSLinkPreviewDraft: NSObject {
@objc @objc
@ -310,10 +325,6 @@ public class OWSLinkPreview: MTLModel {
owsFailDebug("Invalid url.") owsFailDebug("Invalid url.")
return nil return nil
} }
guard url.path.count > 0 else {
owsFailDebug("Invalid url (empty path).")
return nil
}
guard let result = whitelistedDomain(forUrl: url, guard let result = whitelistedDomain(forUrl: url,
domainWhitelist: OWSLinkPreview.linkDomainWhitelist) else { domainWhitelist: OWSLinkPreview.linkDomainWhitelist) else {
owsFailDebug("Missing domain.") owsFailDebug("Missing domain.")
@ -360,13 +371,11 @@ public class OWSLinkPreview: MTLModel {
guard let domain = url.host?.lowercased() else { guard let domain = url.host?.lowercased() else {
return nil return nil
} }
// TODO: We need to verify: guard url.path.count > 1 else {
// // URL must have non-empty path.
// * The final domain whitelist. return nil
// * The relationship between the "link" whitelist and the "media" whitelist. }
// * Exact match or suffix-based?
// * Case-insensitive?
// * Protocol?
for whitelistedDomain in domainWhitelist { for whitelistedDomain in domainWhitelist {
if domain == whitelistedDomain.lowercased() || if domain == whitelistedDomain.lowercased() ||
domain.hasSuffix("." + whitelistedDomain.lowercased()) { domain.hasSuffix("." + whitelistedDomain.lowercased()) {
@ -491,7 +500,7 @@ public class OWSLinkPreview: MTLModel {
} }
return downloadLink(url: previewUrl) return downloadLink(url: previewUrl)
.then(on: DispatchQueue.global()) { (data) -> Promise<OWSLinkPreviewDraft> in .then(on: DispatchQueue.global()) { (data) -> Promise<OWSLinkPreviewDraft> in
return parse(linkData: data, linkUrlString: previewUrl) return parseLinkDataAndBuildDraft(linkData: data, linkUrlString: previewUrl)
.then(on: DispatchQueue.global()) { (linkPreviewDraft) -> Promise<OWSLinkPreviewDraft> in .then(on: DispatchQueue.global()) { (linkPreviewDraft) -> Promise<OWSLinkPreviewDraft> in
guard linkPreviewDraft.isValid() else { guard linkPreviewDraft.isValid() else {
return Promise(error: LinkPreviewError.noPreview) return Promise(error: LinkPreviewError.noPreview)
@ -629,49 +638,30 @@ public class OWSLinkPreview: MTLModel {
return false return false
} }
// Example: class func parseLinkDataAndBuildDraft(linkData: Data,
//
// <meta property="og:title" content="Randomness is Random - Numberphile">
// <meta property="og:image" content="https://i.ytimg.com/vi/tP-Ipsat90c/maxresdefault.jpg">
private class func parse(linkData: Data,
linkUrlString: String) -> Promise<OWSLinkPreviewDraft> { linkUrlString: String) -> Promise<OWSLinkPreviewDraft> {
guard let linkText = String(bytes: linkData, encoding: .utf8) else { do {
owsFailDebug("Could not parse link text.") let contents = try parse(linkData: linkData)
return Promise(error: LinkPreviewError.invalidInput)
}
var title: String?
if let rawTitle = NSRegularExpression.parseFirstMatch(pattern: "<meta\\s+property\\s*=\\s*\"og:title\"\\s+content\\s*=\\s*\"(.*?)\"\\s*/?>", text: linkText) {
if let decodedTitle = decodeHTMLEntities(inString: rawTitle) {
let normalizedTitle = OWSLinkPreview.normalizeTitle(title: decodedTitle)
if normalizedTitle.count > 0 {
title = normalizedTitle
}
}
}
Logger.verbose("title: \(String(describing: title))")
guard let rawImageUrlString = NSRegularExpression.parseFirstMatch(pattern: "<meta\\s+property\\s*=\\s*\"og:image\"\\s+content\\s*=\\s*\"(.*?)\"\\s*/?>", text: linkText) else { let title = contents.title
guard let imageUrl = contents.imageUrl else {
return Promise.value(OWSLinkPreviewDraft(urlString: linkUrlString, title: title)) return Promise.value(OWSLinkPreviewDraft(urlString: linkUrlString, title: title))
} }
guard let imageUrlString = decodeHTMLEntities(inString: rawImageUrlString)?.ows_stripped() else {
return Promise.value(OWSLinkPreviewDraft(urlString: linkUrlString, title: title)) guard isValidMediaUrl(imageUrl) else {
}
guard isValidMediaUrl(imageUrlString) else {
Logger.error("Invalid image URL.") Logger.error("Invalid image URL.")
return Promise.value(OWSLinkPreviewDraft(urlString: linkUrlString, title: title)) return Promise.value(OWSLinkPreviewDraft(urlString: linkUrlString, title: title))
} }
guard let imageFileExtension = fileExtension(forImageUrl: imageUrlString) else { guard let imageFileExtension = fileExtension(forImageUrl: imageUrl) else {
Logger.error("Image URL has unknown or invalid file extension: \(imageUrlString).") Logger.error("Image URL has unknown or invalid file extension: \(imageUrl).")
return Promise.value(OWSLinkPreviewDraft(urlString: linkUrlString, title: title)) return Promise.value(OWSLinkPreviewDraft(urlString: linkUrlString, title: title))
} }
guard let imageMimeType = mimetype(forImageFileExtension: imageFileExtension) else { guard let imageMimeType = mimetype(forImageFileExtension: imageFileExtension) else {
Logger.error("Image URL has unknown or invalid content type: \(imageUrlString).") Logger.error("Image URL has unknown or invalid content type: \(imageUrl).")
return Promise.value(OWSLinkPreviewDraft(urlString: linkUrlString, title: title)) return Promise.value(OWSLinkPreviewDraft(urlString: linkUrlString, title: title))
} }
return downloadImage(url: imageUrlString, imageMimeType: imageMimeType) return downloadImage(url: imageUrl, imageMimeType: imageMimeType)
.then(on: DispatchQueue.global()) { (imageData: Data) -> Promise<OWSLinkPreviewDraft> in .then(on: DispatchQueue.global()) { (imageData: Data) -> Promise<OWSLinkPreviewDraft> in
let imageFilePath = OWSFileSystem.temporaryFilePath(withFileExtension: imageFileExtension) let imageFilePath = OWSFileSystem.temporaryFilePath(withFileExtension: imageFileExtension)
do { do {
@ -697,19 +687,63 @@ public class OWSLinkPreview: MTLModel {
.recover(on: DispatchQueue.global()) { (_) -> Promise<OWSLinkPreviewDraft> in .recover(on: DispatchQueue.global()) { (_) -> Promise<OWSLinkPreviewDraft> in
return Promise.value(OWSLinkPreviewDraft(urlString: linkUrlString, title: title)) return Promise.value(OWSLinkPreviewDraft(urlString: linkUrlString, title: title))
} }
} catch {
owsFailDebug("Could not parse link data: \(error).")
return Promise(error: error)
}
}
// Example:
//
// <meta property="og:title" content="Randomness is Random - Numberphile">
// <meta property="og:image" content="https://i.ytimg.com/vi/tP-Ipsat90c/maxresdefault.jpg">
class func parse(linkData: Data) throws -> OWSLinkPreviewContents {
guard let linkText = String(bytes: linkData, encoding: .utf8) else {
owsFailDebug("Could not parse link text.")
throw LinkPreviewError.invalidInput
} }
private class func fileExtension(forImageUrl urlString: String) -> String? { Logger.verbose("linkText: \(linkText)")
var title: String?
if let rawTitle = NSRegularExpression.parseFirstMatch(pattern: "<meta\\s+property\\s*=\\s*\"og:title\"\\s+content\\s*=\\s*\"(.*?)\"\\s*/?>", text: linkText) {
if let decodedTitle = decodeHTMLEntities(inString: rawTitle) {
let normalizedTitle = OWSLinkPreview.normalizeTitle(title: decodedTitle)
if normalizedTitle.count > 0 {
title = normalizedTitle
}
}
}
Logger.verbose("title: \(String(describing: title))")
guard let rawImageUrlString = NSRegularExpression.parseFirstMatch(pattern: "<meta\\s+property\\s*=\\s*\"og:image\"\\s+content\\s*=\\s*\"(.*?)\"\\s*/?>", text: linkText) else {
return OWSLinkPreviewContents(title: title)
}
guard let imageUrlString = decodeHTMLEntities(inString: rawImageUrlString)?.ows_stripped() else {
return OWSLinkPreviewContents(title: title)
}
return OWSLinkPreviewContents(title: title, imageUrl: imageUrlString)
}
class func fileExtension(forImageUrl urlString: String) -> String? {
guard let imageUrl = URL(string: urlString) else { guard let imageUrl = URL(string: urlString) else {
Logger.error("Could not parse image URL.") Logger.error("Could not parse image URL.")
return nil return nil
} }
let imageFilename = imageUrl.lastPathComponent let imageFilename = imageUrl.lastPathComponent
let imageFileExtension = (imageFilename as NSString).pathExtension.lowercased() let imageFileExtension = (imageFilename as NSString).pathExtension.lowercased()
guard imageFileExtension.count > 0 else {
return nil
}
return imageFileExtension return imageFileExtension
} }
private class func mimetype(forImageFileExtension imageFileExtension: String) -> String? { class func mimetype(forImageFileExtension imageFileExtension: String) -> String? {
guard imageFileExtension.count > 0 else {
return nil
}
guard let imageMimeType = MIMETypeUtil.mimeType(forFileExtension: imageFileExtension) else { guard let imageMimeType = MIMETypeUtil.mimeType(forFileExtension: imageFileExtension) else {
Logger.error("Image URL has unknown content type: \(imageFileExtension).") Logger.error("Image URL has unknown content type: \(imageFileExtension).")
return nil return nil

@ -3,7 +3,7 @@
// //
import Foundation import Foundation
import SignalServiceKit @testable import SignalServiceKit
import XCTest import XCTest
class OWSLinkPreviewTest: SSKBaseTestSwift { class OWSLinkPreviewTest: SSKBaseTestSwift {
@ -92,7 +92,9 @@ class OWSLinkPreviewTest: SSKBaseTestSwift {
func testIsValidLinkUrl() { func testIsValidLinkUrl() {
XCTAssertTrue(OWSLinkPreview.isValidLinkUrl("https://www.youtube.com/watch?v=tP-Ipsat90c")) XCTAssertTrue(OWSLinkPreview.isValidLinkUrl("https://www.youtube.com/watch?v=tP-Ipsat90c"))
XCTAssertTrue(OWSLinkPreview.isValidLinkUrl("https://youtube.com/watch?v=tP-Ipsat90c")) XCTAssertTrue(OWSLinkPreview.isValidLinkUrl("https://youtube.com/watch?v=tP-Ipsat90c"))
XCTAssertTrue(OWSLinkPreview.isValidLinkUrl("https://www.youtube.com"))
// Case shouldn't matter.
XCTAssertTrue(OWSLinkPreview.isValidLinkUrl("https://WWW.YOUTUBE.COM/watch?v=tP-Ipsat90c"))
// Allow arbitrary subdomains. // Allow arbitrary subdomains.
XCTAssertTrue(OWSLinkPreview.isValidMediaUrl("https://some.random.subdomain.youtube.com/watch?v=tP-Ipsat90c")) XCTAssertTrue(OWSLinkPreview.isValidMediaUrl("https://some.random.subdomain.youtube.com/watch?v=tP-Ipsat90c"))
@ -112,12 +114,30 @@ class OWSLinkPreviewTest: SSKBaseTestSwift {
// Don't allow media domains. // Don't allow media domains.
XCTAssertFalse(OWSLinkPreview.isValidLinkUrl("https://i.ytimg.com/vi/tP-Ipsat90c/maxresdefault.jpg")) XCTAssertFalse(OWSLinkPreview.isValidLinkUrl("https://i.ytimg.com/vi/tP-Ipsat90c/maxresdefault.jpg"))
// Allow all whitelisted domains.
XCTAssertTrue(OWSLinkPreview.isValidLinkUrl("https://www.youtube.com/watch?v=tP-Ipsat90c"))
XCTAssertTrue(OWSLinkPreview.isValidLinkUrl("https://youtu.be/tP-Ipsat90c"))
XCTAssertTrue(OWSLinkPreview.isValidLinkUrl("https://www.reddit.com/r/androiddev/comments/a7gctz/androidx_release_notes_this_is_the_first_release/"))
XCTAssertTrue(OWSLinkPreview.isValidLinkUrl("https://www.reddit.com/r/WhitePeopleTwitter/comments/a7j3mm/why/"))
XCTAssertTrue(OWSLinkPreview.isValidLinkUrl("https://imgur.com/gallery/KFCL8fm"))
XCTAssertTrue(OWSLinkPreview.isValidLinkUrl("https://imgur.com/gallery/FMdwTiV"))
XCTAssertTrue(OWSLinkPreview.isValidLinkUrl("https://www.instagram.com/p/BrgpsUjF9Jo/?utm_source=ig_web_button_share_sheet"))
XCTAssertTrue(OWSLinkPreview.isValidLinkUrl("https://www.instagram.com/p/BrgpsUjF9Jo/?utm_source=ig_share_sheet&igshid=94c7ihqjfmbm"))
XCTAssertTrue(OWSLinkPreview.isValidLinkUrl("https://imgur.com/gallery/igHOwDM"))
// Strip trailing commas.
XCTAssertTrue(OWSLinkPreview.isValidLinkUrl("https://imgur.com/gallery/igHOwDM,"))
// Ignore URLs with an empty path.
XCTAssertFalse(OWSLinkPreview.isValidLinkUrl("https://imgur.com"))
XCTAssertFalse(OWSLinkPreview.isValidLinkUrl("https://imgur.com/"))
XCTAssertTrue(OWSLinkPreview.isValidLinkUrl("https://imgur.com/X"))
} }
func testIsValidMediaUrl() { func testIsValidMediaUrl() {
XCTAssertTrue(OWSLinkPreview.isValidMediaUrl("https://www.youtube.com/watch?v=tP-Ipsat90c")) XCTAssertTrue(OWSLinkPreview.isValidMediaUrl("https://www.youtube.com/watch?v=tP-Ipsat90c"))
XCTAssertTrue(OWSLinkPreview.isValidMediaUrl("https://youtube.com/watch?v=tP-Ipsat90c")) XCTAssertTrue(OWSLinkPreview.isValidMediaUrl("https://youtube.com/watch?v=tP-Ipsat90c"))
XCTAssertTrue(OWSLinkPreview.isValidMediaUrl("https://www.youtube.com"))
// Allow arbitrary subdomains. // Allow arbitrary subdomains.
XCTAssertTrue(OWSLinkPreview.isValidMediaUrl("https://some.random.subdomain.youtube.com/watch?v=tP-Ipsat90c")) XCTAssertTrue(OWSLinkPreview.isValidMediaUrl("https://some.random.subdomain.youtube.com/watch?v=tP-Ipsat90c"))
@ -154,4 +174,66 @@ class OWSLinkPreviewTest: SSKBaseTestSwift {
XCTAssertEqual(OWSLinkPreview.previewUrl(forMessageBodyText: "alice bob https://www.youtube.com/watch?v=tP-Ipsat90c jim https://www.youtube.com/watch?v=other-url carol"), XCTAssertEqual(OWSLinkPreview.previewUrl(forMessageBodyText: "alice bob https://www.youtube.com/watch?v=tP-Ipsat90c jim https://www.youtube.com/watch?v=other-url carol"),
"https://www.youtube.com/watch?v=tP-Ipsat90c") "https://www.youtube.com/watch?v=tP-Ipsat90c")
} }
func testUtils() {
XCTAssertNil(OWSLinkPreview.fileExtension(forImageUrl: ""))
XCTAssertNil(OWSLinkPreview.fileExtension(forImageUrl: "https://www.some.host/path/imagename"))
XCTAssertNil(OWSLinkPreview.fileExtension(forImageUrl: "https://www.some.host/path/imagename."))
XCTAssertEqual(OWSLinkPreview.fileExtension(forImageUrl: "https://www.some.host/path/imagename.jpg"), "jpg")
XCTAssertEqual(OWSLinkPreview.fileExtension(forImageUrl: "https://www.some.host/path/imagename.gif"), "gif")
XCTAssertEqual(OWSLinkPreview.fileExtension(forImageUrl: "https://www.some.host/path/imagename.png"), "png")
XCTAssertEqual(OWSLinkPreview.fileExtension(forImageUrl: "https://www.some.host/path/imagename.boink"), "boink")
XCTAssertNil(OWSLinkPreview.mimetype(forImageFileExtension: ""))
XCTAssertNil(OWSLinkPreview.mimetype(forImageFileExtension: "boink"))
XCTAssertNil(OWSLinkPreview.mimetype(forImageFileExtension: "tiff"))
XCTAssertNil(OWSLinkPreview.mimetype(forImageFileExtension: "gif"))
XCTAssertEqual(OWSLinkPreview.mimetype(forImageFileExtension: "jpg"), OWSMimeTypeImageJpeg)
XCTAssertEqual(OWSLinkPreview.mimetype(forImageFileExtension: "png"), OWSMimeTypeImagePng)
}
func testLinkDownloadAndParsing() {
let expectation = self.expectation(description: "link download and parsing")
OWSLinkPreview.tryToBuildPreviewInfo(previewUrl: "https://www.youtube.com/watch?v=tP-Ipsat90c")
.done { (draft) in
XCTAssertNotNil(draft)
XCTAssertEqual(draft.title, "Randomness is Random - Numberphile")
XCTAssertNotNil(draft.imageFilePath)
expectation.fulfill()
}.catch { (error) in
Logger.error("error: \(error)")
XCTFail("Unexpected error: \(error)")
expectation.fulfill()
}.retainUntilComplete()
self.waitForExpectations(timeout: 5.0, handler: nil)
}
func testLinkDataParsing_Empty() {
let linkText = ""
let linkData = linkText.data(using: .utf8)!
let content = try! OWSLinkPreview.parse(linkData: linkData)
XCTAssertNotNil(content)
XCTAssertNil(content.title)
XCTAssertNil(content.imageUrl)
}
func testLinkDataParsing() {
let linkText = ("<meta property=\"og:title\" content=\"Randomness is Random - Numberphile\">" +
"<meta property=\"og:image\" content=\"https://i.ytimg.com/vi/tP-Ipsat90c/maxresdefault.jpg\">")
let linkData = linkText.data(using: .utf8)!
let content = try! OWSLinkPreview.parse(linkData: linkData)
XCTAssertNotNil(content)
XCTAssertEqual(content.title, "Randomness is Random - Numberphile")
XCTAssertEqual(content.imageUrl, "https://i.ytimg.com/vi/tP-Ipsat90c/maxresdefault.jpg")
}
} }

Loading…
Cancel
Save