From 13e02b5bf168969239847b3a915f4f2e11040b8e Mon Sep 17 00:00:00 2001 From: Audric Ackermann Date: Wed, 28 Oct 2020 11:06:47 +1100 Subject: [PATCH] add staged link previews for everything from signal --- js/modules/link_previews.d.ts | 2 + js/modules/link_previews.js | 374 ++--- js/modules/signal.js | 4 - js/views/conversation_view.js | 120 -- package.json | 1 + stylesheets/_modules.scss | 11 +- stylesheets/themes.scss | 2 +- test/modules/link_previews_test.js | 64 - .../conversation/StagedLinkPreview.tsx | 107 +- .../conversation/SessionCompositionBox.tsx | 52 +- .../SessionConversationMessagesList.tsx | 2 +- .../conversation/SessionStagedLinkPreview.tsx | 174 +++ ts/session/utils/Attachments.ts | 4 +- .../isLinkPreviewDateValid_test.ts | 41 + .../linkPreviews/linkPreviewFetch_test.ts | 1296 +++++++++++++++++ ts/types/Attachment.ts | 18 +- ts/types/MIME.ts | 16 +- ts/util/attachmentsUtil.ts | 2 +- ts/util/index.ts | 2 + ts/util/isLinkPreviewDateValid.ts | 11 + ts/util/linkPreviewFetch.ts | 569 ++++++++ yarn.lock | 12 + 22 files changed, 2341 insertions(+), 543 deletions(-) create mode 100644 ts/components/session/conversation/SessionStagedLinkPreview.tsx create mode 100644 ts/test/test-electron/isLinkPreviewDateValid_test.ts create mode 100644 ts/test/test-electron/linkPreviews/linkPreviewFetch_test.ts create mode 100644 ts/util/isLinkPreviewDateValid.ts create mode 100644 ts/util/linkPreviewFetch.ts diff --git a/js/modules/link_previews.d.ts b/js/modules/link_previews.d.ts index ea3e3e3d1..183bdb1cf 100644 --- a/js/modules/link_previews.d.ts +++ b/js/modules/link_previews.d.ts @@ -1 +1,3 @@ +export function isLinkSafeToPreview(link: string): boolean; + export function isLinkSneaky(link: string): boolean; diff --git a/js/modules/link_previews.js b/js/modules/link_previews.js index ba9b8020e..554161e0b 100644 --- a/js/modules/link_previews.js +++ b/js/modules/link_previews.js @@ -1,108 +1,29 @@ /* global URL */ -const { isNumber, compact } = require('lodash'); -const he = require('he'); +const { isNumber, compact, isEmpty, range } = require('lodash'); const nodeUrl = require('url'); const LinkifyIt = require('linkify-it'); const linkify = LinkifyIt(); -const { concatenateBytes, getViewOfArrayBuffer } = require('./crypto'); module.exports = { - assembleChunks, findLinks, - getChunkPattern, getDomain, - getTitleMetaTag, - getImageMetaTag, - isLinkInWhitelist, - isMediaLinkInWhitelist, + isLinkSafeToPreview, isLinkSneaky, }; -const SUPPORTED_DOMAINS = [ - 'youtube.com', - 'www.youtube.com', - 'm.youtube.com', - 'youtu.be', - 'reddit.com', - 'www.reddit.com', - 'm.reddit.com', - 'imgur.com', - 'www.imgur.com', - 'm.imgur.com', - 'i.imgur.com', - 'instagram.com', - 'www.instagram.com', - 'm.instagram.com', - 'tenor.com', - 'gph.is', - 'giphy.com', - 'media.giphy.com', -]; -function isLinkInWhitelist(link) { +function maybeParseHref(href) { try { - const url = new URL(link); - - if (url.protocol !== 'https:') { - return false; - } - - if (!url.pathname || url.pathname.length < 2) { - return false; - } - - const lowercase = url.host.toLowerCase(); - if (!SUPPORTED_DOMAINS.includes(lowercase)) { - return false; - } - - return true; - } catch (error) { - return false; - } -} - -const SUPPORTED_MEDIA_DOMAINS = /^([^.]+\.)*(ytimg.com|cdninstagram.com|redd.it|imgur.com|fbcdn.net|giphy.com|tenor.com)$/i; -function isMediaLinkInWhitelist(link) { - try { - const url = new URL(link); - - if (url.protocol !== 'https:') { - return false; - } - - if (!url.pathname || url.pathname.length < 2) { - return false; - } - - if (!SUPPORTED_MEDIA_DOMAINS.test(url.host)) { - return false; - } - - return true; - } catch (error) { - return false; + return new URL(href); + } catch (err) { + return null; } } -const META_TITLE = //im; -const META_IMAGE = //im; -function _getMetaTag(html, regularExpression) { - const match = regularExpression.exec(html); - if (match && match[1]) { - return he.decode(match[1]).trim(); - } - - return null; -} - -function getTitleMetaTag(html) { - return _getMetaTag(html, META_TITLE); -} -function getImageMetaTag(html) { - const tag = _getMetaTag(html, META_IMAGE); - return typeof tag === 'string' ? tag.replace('http://', 'https://') : tag; +function isLinkSafeToPreview(href) { + const url = maybeParseHref(href); + return Boolean(url && url.protocol === 'https:' && !isLinkSneaky(href)); } function findLinks(text, caretLocation) { @@ -129,225 +50,110 @@ function findLinks(text, caretLocation) { ); } -function getDomain(url) { - try { - const urlObject = new URL(url); - return urlObject.hostname; - } catch (error) { - return null; - } +function getDomain(href) { + const url = maybeParseHref(href); + return url ? url.hostname : null; } -const MB = 1024 * 1024; -const KB = 1024; - -function getChunkPattern(size) { - if (size > MB) { - return _getRequestPattern(size, MB); - } else if (size > 500 * KB) { - return _getRequestPattern(size, 500 * KB); - } else if (size > 100 * KB) { - return _getRequestPattern(size, 100 * KB); - } else if (size > 50 * KB) { - return _getRequestPattern(size, 50 * KB); - } else if (size > 10 * KB) { - return _getRequestPattern(size, 10 * KB); - } else if (size > KB) { - return _getRequestPattern(size, KB); +// See . +const VALID_URI_CHARACTERS = new Set([ + '%', + // "gen-delims" + ':', + '/', + '?', + '#', + '[', + ']', + '@', + // "sub-delims" + '!', + '$', + '&', + "'", + '(', + ')', + '*', + '+', + ',', + ';', + '=', + // unreserved + ...String.fromCharCode(...range(65, 91), ...range(97, 123)), + ...range(10).map(String), + '-', + '.', + '_', + '~', +]); +const ASCII_PATTERN = new RegExp('[\\u0020-\\u007F]', 'g'); +const MAX_HREF_LENGTH = 2 ** 12; + +function isLinkSneaky(href) { + // This helps users avoid extremely long links (which could be hiding something + // sketchy) and also sidesteps the performance implications of extremely long hrefs. + if (href.length > MAX_HREF_LENGTH) { + return true; } - throw new Error(`getChunkPattern: Unsupported size: ${size}`); -} - -function _getRequestPattern(size, increment) { - const results = []; + const url = maybeParseHref(href); - let offset = 0; - while (size - offset > increment) { - results.push({ - start: offset, - end: offset + increment - 1, - overlap: 0, - }); - offset += increment; + // If we can't parse it, it's sneaky. + if (!url) { + return true; } - if (size - offset > 0) { - results.push({ - start: size - increment, - end: size - 1, - overlap: increment - (size - offset), - }); + // Any links which contain auth are considered sneaky + if (url.username) { + return true; } - return results; -} - -function assembleChunks(chunkDescriptors) { - const chunks = chunkDescriptors.map((chunk, index) => { - if (index !== chunkDescriptors.length - 1) { - return chunk.data; - } - - if (!chunk.overlap) { - return chunk.data; - } - - return getViewOfArrayBuffer( - chunk.data, - chunk.overlap, - chunk.data.byteLength - ); - }); - - return concatenateBytes(...chunks); -} - -const LATIN_PATTERN = new RegExp( - '[' + - '\\u0041-\\u005A' + - '\\u0061-\\u007A' + - '\\u00AA' + - '\\u00BA' + - '\\u00C0-\\u00DC' + - '\\u00D8-\\u00F6' + - '\\u00F8-\\u01BA' + - ']' -); - -const CYRILLIC_PATTERN = new RegExp( - '[' + - '\\u0400-\\u0481' + - '\\u0482' + - '\\u0483-\\u0484' + - '\\u0487' + - '\\u0488-\\u0489' + - '\\u048A-\\u052F' + - '\\u1C80-\\u1C88' + - '\\u1D2B' + - '\\u1D78' + - '\\u2DE0-\\u2DFF' + - '\\uA640-\\uA66D' + - '\\uA66E' + - '\\uA66F' + - '\\uA670-\\uA672' + - '\\uA673' + - '\\uA674-\\uA67D' + - '\\uA67E' + - '\\uA67F' + - '\\uA680-\\uA69B' + - '\\uA69C-\\uA69D' + - '\\uA69E-\\uA69F' + - '\\uFE2E-\\uFE2F' + - ']' -); - -const GREEK_PATTERN = new RegExp( - '[' + - '\\u0370-\\u0373' + - '\\u0375' + - '\\u0376-\\u0377' + - '\\u037A' + - '\\u037B-\\u037D' + - '\\u037F' + - '\\u0384' + - '\\u0386' + - '\\u0388-\\u038A' + - '\\u038C' + - '\\u038E-\\u03A1' + - '\\u03A3-\\u03E1' + - '\\u03F0-\\u03F5' + - '\\u03F6' + - '\\u03F7-\\u03FF' + - '\\u1D26-\\u1D2A' + - '\\u1D5D-\\u1D61' + - '\\u1D66-\\u1D6A' + - '\\u1DBF' + - '\\u1F00-\\u1F15' + - '\\u1F18-\\u1F1D' + - '\\u1F20-\\u1F45' + - '\\u1F48-\\u1F4D' + - '\\u1F50-\\u1F57' + - '\\u1F59' + - '\\u1F5B' + - '\\u1F5D' + - '\\u1F5F-\\u1F7D' + - '\\u1F80-\\u1FB4' + - '\\u1FB6-\\u1FBC' + - '\\u1FBD' + - '\\u1FBE' + - '\\u1FBF-\\u1FC1' + - '\\u1FC2-\\u1FC4' + - '\\u1FC6-\\u1FCC' + - '\\u1FCD-\\u1FCF' + - '\\u1FD0-\\u1FD3' + - '\\u1FD6-\\u1FDB' + - '\\u1FDD-\\u1FDF' + - '\\u1FE0-\\u1FEC' + - '\\u1FED-\\u1FEF' + - '\\u1FF2-\\u1FF4' + - '\\u1FF6-\\u1FFC' + - '\\u1FFD-\\u1FFE' + - '\\u2126' + - '\\uAB65' + - ']' -); - -const HIGH_GREEK_PATTERN = new RegExp( - '[' + - `${String.fromCodePoint(0x10140)}-${String.fromCodePoint(0x10174)}` + - `${String.fromCodePoint(0x10175)}-${String.fromCodePoint(0x10178)}` + - `${String.fromCodePoint(0x10179)}-${String.fromCodePoint(0x10189)}` + - `${String.fromCodePoint(0x1018a)}-${String.fromCodePoint(0x1018b)}` + - `${String.fromCodePoint(0x1018c)}-${String.fromCodePoint(0x1018e)}` + - `${String.fromCodePoint(0x101a0)}` + - `${String.fromCodePoint(0x1d200)}-${String.fromCodePoint(0x1d241)}` + - `${String.fromCodePoint(0x1d242)}-${String.fromCodePoint(0x1d244)}` + - `${String.fromCodePoint(0x1d245)}` + - ']', - 'u' -); - -function isChunkSneaky(chunk) { - const hasLatin = LATIN_PATTERN.test(chunk); - if (!hasLatin) { - return false; + // If the domain is falsy, something fishy is going on + if (!url.hostname) { + return true; } - const hasCyrillic = CYRILLIC_PATTERN.test(chunk); - if (hasCyrillic) { + // To quote [RFC 1034][0]: "the total number of octets that represent a + // domain name [...] is limited to 255." To be extra careful, we set a + // maximum of 2048. (This also uses the string's `.length` property, + // which isn't exactly the same thing as the number of octets.) + // [0]: https://tools.ietf.org/html/rfc1034 + if (url.hostname.length > 2048) { return true; } - const hasGreek = GREEK_PATTERN.test(chunk); - if (hasGreek) { + // Domains cannot contain encoded characters + if (url.hostname.includes('%')) { return true; } - const hasHighGreek = HIGH_GREEK_PATTERN.test(chunk); - if (hasHighGreek) { + // There must be at least 2 domain labels, and none of them can be empty. + const labels = url.hostname.split('.'); + if (labels.length < 2 || labels.some(isEmpty)) { return true; } - return false; -} + // This is necesary because getDomain returns domains in punycode form. + const unicodeDomain = nodeUrl.domainToUnicode + ? nodeUrl.domainToUnicode(url.hostname) + : url.hostname; -function isLinkSneaky(link) { - const domain = getDomain(link); + const withoutPeriods = unicodeDomain.replace(/\./g, ''); - // This is necesary because getDomain returns domains in punycode form. We check whether - // it's available for the StyleGuide. - const unicodeDomain = nodeUrl.domainToUnicode - ? nodeUrl.domainToUnicode(domain) - : domain; + const hasASCII = ASCII_PATTERN.test(withoutPeriods); + const withoutASCII = withoutPeriods.replace(ASCII_PATTERN, ''); - const chunks = unicodeDomain.split('.'); - for (let i = 0, max = chunks.length; i < max; i += 1) { - const chunk = chunks[i]; - if (isChunkSneaky(chunk)) { - return true; - } + const isMixed = hasASCII && withoutASCII.length > 0; + if (isMixed) { + return true; } - return false; + // We can't use `url.pathname` (and so on) because it automatically encodes strings. + // For example, it turns `/aquí` into `/aqu%C3%AD`. + const startOfPathAndHash = href.indexOf('/', url.protocol.length + 4); + const pathAndHash = + startOfPathAndHash === -1 ? '' : href.substr(startOfPathAndHash); + return [...pathAndHash].some( + character => !VALID_URI_CHARACTERS.has(character) + ); } diff --git a/js/modules/signal.js b/js/modules/signal.js index ffce318c6..c6ce379e0 100644 --- a/js/modules/signal.js +++ b/js/modules/signal.js @@ -117,9 +117,6 @@ const { Quote } = require('../../ts/components/conversation/Quote'); const { ResetSessionNotification, } = require('../../ts/components/conversation/ResetSessionNotification'); -const { - StagedLinkPreview, -} = require('../../ts/components/conversation/StagedLinkPreview'); const { TimerNotification, } = require('../../ts/components/conversation/TimerNotification'); @@ -311,7 +308,6 @@ exports.setup = (options = {}) => { MessageDetail, Quote, ResetSessionNotification, - StagedLinkPreview, TimerNotification, Types: { Message: MediaGalleryMessage, diff --git a/js/views/conversation_view.js b/js/views/conversation_view.js index fb9f6df44..dfa0fea7b 100644 --- a/js/views/conversation_view.js +++ b/js/views/conversation_view.js @@ -1740,126 +1740,6 @@ this.renderLinkPreview(); }, - async makeChunkedRequest(url) { - const PARALLELISM = 3; - const size = await textsecure.messaging.getProxiedSize(url); - const chunks = await Signal.LinkPreviews.getChunkPattern(size); - - let results = []; - const jobs = chunks.map(chunk => async () => { - const { start, end } = chunk; - - const result = await textsecure.messaging.makeProxiedRequest(url, { - start, - end, - returnArrayBuffer: true, - }); - - return { - ...chunk, - ...result, - }; - }); - - while (jobs.length > 0) { - const activeJobs = []; - for (let i = 0, max = PARALLELISM; i < max; i += 1) { - if (!jobs.length) { - break; - } - - const job = jobs.shift(); - activeJobs.push(job()); - } - - // eslint-disable-next-line no-await-in-loop - results = results.concat(await Promise.all(activeJobs)); - } - - if (!results.length) { - throw new Error('No responses received'); - } - - const { contentType } = results[0]; - const data = Signal.LinkPreviews.assembleChunks(results); - - return { - contentType, - data, - }; - }, - - async getPreview(url) { - let html; - try { - html = await textsecure.messaging.makeProxiedRequest(url); - } catch (error) { - if (error.code >= 300) { - return null; - } - } - - const title = window.Signal.LinkPreviews.getTitleMetaTag(html); - const imageUrl = window.Signal.LinkPreviews.getImageMetaTag(html); - - let image; - let objectUrl; - try { - if (imageUrl) { - if (!Signal.LinkPreviews.isMediaLinkInWhitelist(imageUrl)) { - const primaryDomain = Signal.LinkPreviews.getDomain(url); - const imageDomain = Signal.LinkPreviews.getDomain(imageUrl); - throw new Error( - `imageUrl for domain ${primaryDomain} did not match media whitelist. Domain: ${imageDomain}` - ); - } - - const data = await this.makeChunkedRequest(imageUrl); - - // Ensure that this file is either small enough or is resized to meet our - // requirements for attachments - const withBlob = await this.fileInput.autoScale({ - contentType: data.contentType, - file: new Blob([data.data], { - type: data.contentType, - }), - }); - - const attachment = await this.fileInput.readFile(withBlob); - objectUrl = URL.createObjectURL(withBlob.file); - - const dimensions = await Signal.Types.VisualAttachment.getImageDimensions( - { - objectUrl, - logger: window.log, - } - ); - - image = { - ...attachment, - ...dimensions, - contentType: withBlob.file.type, - }; - } - } catch (error) { - // We still want to show the preview if we failed to get an image - window.log.error( - 'getPreview failed to get image for link preview:', - error.message - ); - } finally { - if (objectUrl) { - URL.revokeObjectURL(objectUrl); - } - } - - return { - title, - url, - image, - }; - }, - async addLinkPreview(url) { (this.preview || []).forEach(item => { if (item.url) { diff --git a/package.json b/package.json index 651bad162..a483676d5 100644 --- a/package.json +++ b/package.json @@ -63,6 +63,7 @@ "@types/rc-slider": "^8.6.5", "@types/react-mic": "^12.4.1", "@types/styled-components": "^5.1.4", + "abort-controller": "3.0.0", "backbone": "1.3.3", "blob-util": "1.3.0", "blueimp-canvas-to-blob": "3.14.0", diff --git a/stylesheets/_modules.scss b/stylesheets/_modules.scss index 314366259..3cf2336df 100644 --- a/stylesheets/_modules.scss +++ b/stylesheets/_modules.scss @@ -2301,13 +2301,20 @@ align-items: flex-start; min-height: 65px; + margin: $session-margin-xs; + @include themify($themes) { + background-color: themed('sentMessageBackground'); + border-radius: $session-margin-xs; + } } .module-staged-link-preview--is-loading { align-items: center; } .module-staged-link-preview__loading { - color: $color-gray-60; + @include themify($themes) { + color: themed('sentMessageText'); + } font-size: 14px; text-align: center; flex-grow: 1; @@ -2319,6 +2326,8 @@ } .module-staged-link-preview__content { margin-inline-end: 20px; + padding-inline-start: $session-margin-sm; + padding-inline-end: $session-margin-sm; } .module-staged-link-preview__title { color: $color-gray-90; diff --git a/stylesheets/themes.scss b/stylesheets/themes.scss index 372bfbce6..ee6d3b93b 100644 --- a/stylesheets/themes.scss +++ b/stylesheets/themes.scss @@ -15,8 +15,8 @@ $themes: ( light: ( accent: $accentLightTheme, accentButton: $black, - destructive: $destructive, cellBackground: #fcfcfc, + destructive: $destructive, modalBackground: #fcfcfc, fakeChatBubbleBackground: #f5f5f5, // input diff --git a/test/modules/link_previews_test.js b/test/modules/link_previews_test.js index f88c371e6..0a5800982 100644 --- a/test/modules/link_previews_test.js +++ b/test/modules/link_previews_test.js @@ -6,7 +6,6 @@ const { getImageMetaTag, isLinkInWhitelist, isLinkSneaky, - isMediaLinkInWhitelist, } = require('../../js/modules/link_previews'); describe('Link previews', () => { @@ -89,69 +88,6 @@ describe('Link previews', () => { }); }); - describe('#isMediaLinkInWhitelist', () => { - it('returns true for valid links', () => { - assert.strictEqual( - isMediaLinkInWhitelist( - 'https://i.ytimg.com/vi/bZHShcCEH3I/hqdefault.jpg' - ), - true - ); - assert.strictEqual( - isMediaLinkInWhitelist('https://random.cdninstagram.com/blah'), - true - ); - assert.strictEqual( - isMediaLinkInWhitelist('https://preview.redd.it/something'), - true - ); - assert.strictEqual( - isMediaLinkInWhitelist('https://i.imgur.com/something'), - true - ); - }); - - it('returns false for insecure protocol', () => { - assert.strictEqual( - isMediaLinkInWhitelist( - 'http://i.ytimg.com/vi/bZHShcCEH3I/hqdefault.jpg' - ), - false - ); - assert.strictEqual( - isMediaLinkInWhitelist('http://random.cdninstagram.com/blah'), - false - ); - assert.strictEqual( - isMediaLinkInWhitelist('http://preview.redd.it/something'), - false - ); - assert.strictEqual( - isMediaLinkInWhitelist('http://i.imgur.com/something'), - false - ); - }); - - it('returns false for other domains', () => { - assert.strictEqual( - isMediaLinkInWhitelist('https://www.youtube.com/something'), - false - ); - assert.strictEqual( - isMediaLinkInWhitelist('https://youtu.be/something'), - false - ); - assert.strictEqual( - isMediaLinkInWhitelist('https://www.instagram.com/something'), - false - ); - assert.strictEqual( - isMediaLinkInWhitelist('https://cnn.com/something'), - false - ); - }); - }); - describe('#_getMetaTag', () => { it('returns html-decoded tag contents from Youtube', () => { const youtube = ` diff --git a/ts/components/conversation/StagedLinkPreview.tsx b/ts/components/conversation/StagedLinkPreview.tsx index d1bc3ae5d..06c0fe225 100644 --- a/ts/components/conversation/StagedLinkPreview.tsx +++ b/ts/components/conversation/StagedLinkPreview.tsx @@ -4,61 +4,70 @@ import classNames from 'classnames'; import { Image } from './Image'; import { AttachmentType, isImageAttachment } from '../../types/Attachment'; -import { LocalizerType } from '../../types/Util'; -interface Props { +type Props = { isLoaded: boolean; - title: string; - domain: string; + title: null | string; + description: null | string; + domain: null | string; image?: AttachmentType; - i18n: LocalizerType; - onClose?: () => void; -} + onClose: () => void; +}; -export class StagedLinkPreview extends React.Component { - public render() { - const { isLoaded, onClose, i18n, title, image, domain } = this.props; +export const StagedLinkPreview = (props: Props) => { + const { isLoaded, onClose, title, image, domain, description } = props; - const isImage = image && isImageAttachment(image); + const isImage = image && isImageAttachment(image); + const i18n = window.i18n; + if (isLoaded && !(title && domain)) { + return <>; + } - return ( -
- {!isLoaded ? ( -
- {i18n('loading')} -
- ) : null} - {isLoaded && image && isImage ? ( -
- {i18n('stagedPreviewThumbnail', -
- ) : null} - {isLoaded ? ( -
-
{title}
+ return ( +
+ {!isLoaded ? ( +
+ {i18n('loading')} +
+ ) : null} + {isLoaded && image && isImage ? ( +
+ {i18n('stagedPreviewThumbnail', +
+ ) : null} + {isLoaded ? ( +
+
{title}
+ {description && ( +
+ {description} +
+ )} +
{domain}
- ) : null} -
-
- ); - } -} +
+ ) : null} +
+ ); +}; diff --git a/ts/components/session/conversation/SessionCompositionBox.tsx b/ts/components/session/conversation/SessionCompositionBox.tsx index aa516b655..ee5157c47 100644 --- a/ts/components/session/conversation/SessionCompositionBox.tsx +++ b/ts/components/session/conversation/SessionCompositionBox.tsx @@ -20,6 +20,7 @@ import { Flex } from '../Flex'; import { AttachmentList } from '../../conversation/AttachmentList'; import { ToastUtils } from '../../../session/utils'; import { AttachmentUtil } from '../../../util'; +import { SessionStagedLinkPreview } from './SessionStagedLinkPreview'; export interface ReplyingToMessageProps { convoId: string; @@ -62,6 +63,7 @@ interface State { mediaSetting: boolean | null; showEmojiPanel: boolean; voiceRecording?: Blob; + ignoredLink?: string; } export class SessionCompositionBox extends React.Component { @@ -92,6 +94,8 @@ export class SessionCompositionBox extends React.Component { this.renderRecordingView = this.renderRecordingView.bind(this); this.renderCompositionView = this.renderCompositionView.bind(this); this.renderQuotedMessage = this.renderQuotedMessage.bind(this); + this.renderStagedLinkPreview = this.renderStagedLinkPreview.bind(this); + this.renderAttachmentsStaged = this.renderAttachmentsStaged.bind(this); // Recording view functions @@ -127,6 +131,7 @@ export class SessionCompositionBox extends React.Component { return ( {this.renderQuotedMessage()} + {this.renderStagedLinkPreview()} {this.renderAttachmentsStaged()}
{showRecordingView @@ -254,6 +259,46 @@ export class SessionCompositionBox extends React.Component { ); } + private renderStagedLinkPreview(): JSX.Element { + // Don't generate link previews if user has turned them off + if (!(window.getSettingValue('link-preview-setting') || false)) { + return <>; + } + + // Do nothing if we're offline + if (!window.textsecure.messaging) { + return <>; + } + + const { stagedAttachments, quotedMessageProps } = this.props; + const { ignoredLink } = this.state; + + // Don't render link previews if quoted message or attachments + if (stagedAttachments.length === 0 && !quotedMessageProps?.id) { + // we try to match the first link found in the current message + const links = window.Signal.LinkPreviews.findLinks( + this.state.message, + undefined + ); + if (!links || links.length === 0 || ignoredLink === links[0]) { + return <>; + } + const firstLink = links[0]; + if (ignoredLink && ignoredLink !== firstLink) { + this.setState({ ignoredLink: undefined }); + } + return ( + { + this.setState({ ignoredLink: firstLink }); + }} + /> + ); + } + return <>; + } + private renderQuotedMessage() { const { quotedMessageProps, removeQuotedMessage } = this.props; if (quotedMessageProps && quotedMessageProps.id) { @@ -434,13 +479,6 @@ export class SessionCompositionBox extends React.Component { this.props.onExitVoiceNoteView(); } - private onDrop() { - // On drop attachments! - // this.textarea.current?.ondrop; - // Look into react-dropzone - // DROP AREA COMES FROM SessionConversation NOT HERE - } - private onChange(event: any) { const message = event.target.value ?? ''; diff --git a/ts/components/session/conversation/SessionConversationMessagesList.tsx b/ts/components/session/conversation/SessionConversationMessagesList.tsx index 9606e5dc0..4e640a359 100644 --- a/ts/components/session/conversation/SessionConversationMessagesList.tsx +++ b/ts/components/session/conversation/SessionConversationMessagesList.tsx @@ -81,7 +81,7 @@ export class SessionConversationMessagesList extends React.Component< // Keep scrolled to bottom unless user scrolls up if (this.state.isScrolledToBottom) { this.scrollToBottom(); - this.updateReadMessages(); + // this.updateReadMessages(); } // New messages get from message collection. diff --git a/ts/components/session/conversation/SessionStagedLinkPreview.tsx b/ts/components/session/conversation/SessionStagedLinkPreview.tsx new file mode 100644 index 000000000..5a8311436 --- /dev/null +++ b/ts/components/session/conversation/SessionStagedLinkPreview.tsx @@ -0,0 +1,174 @@ +import React, { useEffect, useState } from 'react'; +import { arrayBufferFromFile, AttachmentType } from '../../../types/Attachment'; +import { AttachmentUtil, LinkPreviewUtil } from '../../../util'; +import { StagedLinkPreview } from '../../conversation/StagedLinkPreview'; +import fetch from 'node-fetch'; +import { fetchLinkPreviewImage } from '../../../util/linkPreviewFetch'; +import { AbortController, AbortSignal } from 'abort-controller'; + +type Props = { + url: string; + onClose: () => void; +}; +const LINK_PREVIEW_TIMEOUT = 60 * 1000; + +export interface GetLinkPreviewResultImage { + data: ArrayBuffer; + size: number; + contentType: string; + width: number; + height: number; +} + +export interface GetLinkPreviewResult { + title: string; + url: string; + image?: GetLinkPreviewResultImage; + description: string | null; + date: number | null; +} + +const getPreview = async ( + url: string, + abortSignal: AbortSignal +): Promise => { + // This is already checked elsewhere, but we want to be extra-careful. + if (!window.Signal.LinkPreviews.isLinkSafeToPreview(url)) { + return null; + } + + const linkPreviewMetadata = await LinkPreviewUtil.fetchLinkPreviewMetadata( + fetch, + url, + abortSignal + ); + if (!linkPreviewMetadata) { + return null; + } + const { title, imageHref, description, date } = linkPreviewMetadata; + + let image; + if (imageHref && window.Signal.LinkPreviews.isLinkSafeToPreview(imageHref)) { + let objectUrl: void | string; + try { + const fullSizeImage = await fetchLinkPreviewImage( + fetch, + imageHref, + abortSignal + ); + if (!fullSizeImage) { + throw new Error('Failed to fetch link preview image'); + } + + // Ensure that this file is either small enough or is resized to meet our + // requirements for attachments + const withBlob = await AttachmentUtil.autoScale({ + contentType: fullSizeImage.contentType, + file: new Blob([fullSizeImage.data], { + type: fullSizeImage.contentType, + }), + }); + + const data = await arrayBufferFromFile(withBlob.file); + objectUrl = URL.createObjectURL(withBlob.file); + + const dimensions = await window.Signal.Types.VisualAttachment.getImageDimensions( + { + objectUrl, + logger: window.log, + } + ); + + image = { + data, + size: data.byteLength, + ...dimensions, + contentType: withBlob.file.type, + }; + } catch (error) { + // We still want to show the preview if we failed to get an image + window.log.error( + 'getPreview failed to get image for link preview:', + error.message + ); + } finally { + if (objectUrl) { + URL.revokeObjectURL(objectUrl); + } + } + } + + return { + title, + url, + image, + description, + date, + }; +}; + +export const SessionStagedLinkPreview = (props: Props) => { + const [isLoaded, setIsLoaded] = useState(false); + const [title, setTitle] = useState(null); + const [domain, setDomain] = useState(null); + const [description, setDescription] = useState(null); + const [image, setImage] = useState(undefined); + + useEffect(() => { + // Use this abortcontroller to stop current fetch requests when url changed + const abortController = new AbortController(); + setTimeout(() => { + abortController.abort(); + }, LINK_PREVIEW_TIMEOUT); + + setIsLoaded(false); + setTitle(null); + setDomain(null); + setDescription(null); + setImage(undefined); + + getPreview(props.url, abortController.signal) + .then(ret => { + setIsLoaded(true); + if (ret) { + setTitle(ret.title); + if (ret.image?.width) { + if (ret.image) { + const blob = new Blob([ret.image.data], { + type: ret.image.contentType, + }); + const imageAttachment = { + ...ret.image, + url: URL.createObjectURL(blob), + fileName: 'preview', + }; + setImage(imageAttachment); + } + } + setDomain(window.Signal.LinkPreviews.getDomain(ret.url)); + if (ret.description) { + setDescription(ret.description); + } + } + }) + .catch(err => { + abortController.abort(); + setIsLoaded(true); + }); + return () => { + // Cancel other in-flight link preview requests. + abortController.abort(); + }; + }, [props.url]); + + return ( + + ); +}; diff --git a/ts/session/utils/Attachments.ts b/ts/session/utils/Attachments.ts index 10345d500..a6cfe3d54 100644 --- a/ts/session/utils/Attachments.ts +++ b/ts/session/utils/Attachments.ts @@ -68,9 +68,7 @@ export class AttachmentUtils { server = openGroupServer; } const pointer: AttachmentPointer = { - contentType: attachment.contentType - ? (attachment.contentType as string) - : undefined, + contentType: attachment.contentType ? attachment.contentType : undefined, size: attachment.size, fileName: attachment.fileName, flags: attachment.flags, diff --git a/ts/test/test-electron/isLinkPreviewDateValid_test.ts b/ts/test/test-electron/isLinkPreviewDateValid_test.ts new file mode 100644 index 000000000..1b28d618f --- /dev/null +++ b/ts/test/test-electron/isLinkPreviewDateValid_test.ts @@ -0,0 +1,41 @@ +import { assert } from 'chai'; + +import { isLinkPreviewDateValid } from '../../util/isLinkPreviewDateValid'; + +describe('isLinkPreviewDateValid', () => { + it('returns false for non-numbers', () => { + assert.isFalse(isLinkPreviewDateValid(null)); + assert.isFalse(isLinkPreviewDateValid(undefined)); + assert.isFalse(isLinkPreviewDateValid(Date.now().toString())); + assert.isFalse(isLinkPreviewDateValid(new Date())); + }); + + it('returns false for zero', () => { + assert.isFalse(isLinkPreviewDateValid(0)); + assert.isFalse(isLinkPreviewDateValid(-0)); + }); + + it('returns false for NaN', () => { + assert.isFalse(isLinkPreviewDateValid(0 / 0)); + }); + + it('returns false for any infinite value', () => { + assert.isFalse(isLinkPreviewDateValid(Infinity)); + assert.isFalse(isLinkPreviewDateValid(-Infinity)); + }); + + it('returns false for timestamps more than a day from now', () => { + const twoDays = 2 * 24 * 60 * 60 * 1000; + assert.isFalse(isLinkPreviewDateValid(Date.now() + twoDays)); + }); + + it('returns true for timestamps before tomorrow', () => { + assert.isTrue(isLinkPreviewDateValid(Date.now())); + assert.isTrue(isLinkPreviewDateValid(Date.now() + 123)); + assert.isTrue(isLinkPreviewDateValid(Date.now() - 123)); + assert.isTrue(isLinkPreviewDateValid(new Date(1995, 3, 20).valueOf())); + assert.isTrue(isLinkPreviewDateValid(new Date(1970, 3, 20).valueOf())); + assert.isTrue(isLinkPreviewDateValid(new Date(1969, 3, 20).valueOf())); + assert.isTrue(isLinkPreviewDateValid(1)); + }); +}); diff --git a/ts/test/test-electron/linkPreviews/linkPreviewFetch_test.ts b/ts/test/test-electron/linkPreviews/linkPreviewFetch_test.ts new file mode 100644 index 000000000..0e3e431e8 --- /dev/null +++ b/ts/test/test-electron/linkPreviews/linkPreviewFetch_test.ts @@ -0,0 +1,1296 @@ +import { assert } from 'chai'; +import * as sinon from 'sinon'; +import * as fs from 'fs'; +import * as path from 'path'; +import AbortController from 'abort-controller'; +import { IMAGE_JPEG, MIMEType } from '../../../types/MIME'; + +import { + fetchLinkPreviewImage, + fetchLinkPreviewMetadata, +} from '../../../util/linkPreviewFetch'; + +// tslint:disable: no-http-string + +describe('link preview fetching', () => { + // We'll use this to create a fake `fetch`. We'll want to call `.resolves` or + // `.rejects` on it (meaning that it needs to be a Sinon Stub type), but we'll also + // want it to be a fake `fetch`. `any` seems like the best "supertype" there. + // eslint-disable-next-line @typescript-eslint/no-explicit-any + function stub(): any { + return sinon.stub(); + } + + let sandbox: sinon.SinonSandbox; + let warn: sinon.SinonStub; + + beforeEach(() => { + sandbox = sinon.createSandbox(); + warn = sandbox.stub(window.log, 'warn'); + }); + + afterEach(() => { + sandbox.restore(); + }); + + // tslint:disable-next-line: max-func-body-length + describe('fetchLinkPreviewMetadata', () => { + const makeHtml = (stuffInHead: ReadonlyArray = []) => ` + + + ${stuffInHead.join('\n')} + should be ignored + + `; + + const makeResponse = ({ + status = 200, + headers = {}, + body = makeHtml(['test title']), + url = 'https://example.com', + }: { + status?: number; + headers?: { [key: string]: null | string }; + body?: null | string | Uint8Array | AsyncIterable; + url?: string; + } = {}) => { + let bodyLength: null | number; + let bodyStream: null | AsyncIterable; + if (!body) { + bodyLength = 0; + bodyStream = null; + } else if (typeof body === 'string') { + const asBytes = new TextEncoder().encode(body); + bodyLength = asBytes.length; + bodyStream = (async function* stream() { + yield asBytes; + })(); + } else if (body instanceof Uint8Array) { + bodyLength = body.length; + bodyStream = (async function* stream() { + yield body; + })(); + } else { + bodyLength = null; + bodyStream = body; + } + + const headersObj = new Headers(); + Object.entries({ + 'Content-Type': 'text/html; charset=utf-8', + 'Content-Length': bodyLength === null ? null : String(bodyLength), + ...headers, + }).forEach(([headerName, headerValue]) => { + if (headerValue) { + headersObj.set(headerName, headerValue); + } + }); + + return { + headers: headersObj, + body: bodyStream, + ok: status >= 200 && status <= 299, + status, + url, + }; + }; + + it('handles the "kitchen sink" of results', async () => { + const fakeFetch = stub().resolves( + makeResponse({ + body: makeHtml([ + '', + '', + '', + '', + ]), + }) + ); + + assert.deepEqual( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ), + { + title: 'test title', + description: 'test description', + date: 1587386096009, + imageHref: 'https://example.com/image.jpg', + } + ); + }); + + it('logs no warnings if everything goes smoothly', async () => { + const fakeFetch = stub().resolves( + makeResponse({ + body: makeHtml([ + '', + '', + '', + '', + ]), + }) + ); + + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ); + + sinon.assert.notCalled(warn); + }); + + it('sends "WhatsApp" as the User-Agent for compatibility', async () => { + const fakeFetch = stub().resolves(makeResponse()); + + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ); + + sinon.assert.calledWith( + fakeFetch, + 'https://example.com', + sinon.match({ + headers: { + 'User-Agent': 'WhatsApp', + }, + }) + ); + }); + + it('returns null if the request fails', async () => { + const fakeFetch = stub().rejects(new Error('Test request failure')); + + assert.isNull( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ) + ); + + sinon.assert.calledOnce(warn); + sinon.assert.calledWith( + warn, + 'fetchLinkPreviewMetadata: failed to fetch link preview HTML; bailing' + ); + }); + + it("returns null if the response status code isn't 2xx", async () => { + await Promise.all( + [100, 304, 400, 404, 500, 0, -200].map(async status => { + const fakeFetch = stub().resolves(makeResponse({ status })); + + assert.isNull( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ) + ); + + sinon.assert.calledWith( + warn, + `fetchLinkPreviewMetadata: got a ${status} status code; bailing` + ); + }) + ); + }); + + it("doesn't use fetch's automatic redirection behavior", async () => { + const fakeFetch = stub().resolves(makeResponse()); + + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ); + + sinon.assert.calledWith( + fakeFetch, + 'https://example.com', + sinon.match({ redirect: 'manual' }) + ); + }); + + [301, 302, 303, 307, 308].forEach(status => { + it(`handles ${status} redirects`, async () => { + const fakeFetch = stub(); + fakeFetch.onFirstCall().resolves( + makeResponse({ + status, + headers: { Location: 'https://example.com/2' }, + body: null, + }) + ); + fakeFetch.onSecondCall().resolves(makeResponse()); + + assert.deepEqual( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ), + { + title: 'test title', + description: null, + date: null, + imageHref: null, + } + ); + + sinon.assert.calledTwice(fakeFetch); + sinon.assert.calledWith(fakeFetch.getCall(0), 'https://example.com'); + sinon.assert.calledWith(fakeFetch.getCall(1), 'https://example.com/2'); + }); + + it(`returns null when seeing a ${status} status with no Location header`, async () => { + const fakeFetch = stub().resolves(makeResponse({ status })); + + assert.isNull( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ) + ); + }); + }); + + it('handles relative redirects', async () => { + const fakeFetch = stub(); + fakeFetch.onFirstCall().resolves( + makeResponse({ + status: 301, + headers: { Location: '/2/' }, + body: null, + }) + ); + fakeFetch.onSecondCall().resolves( + makeResponse({ + status: 301, + headers: { Location: '3' }, + body: null, + }) + ); + fakeFetch.onThirdCall().resolves(makeResponse()); + + assert.deepEqual( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ), + { + title: 'test title', + description: null, + date: null, + imageHref: null, + } + ); + + sinon.assert.calledThrice(fakeFetch); + sinon.assert.calledWith(fakeFetch.getCall(0), 'https://example.com'); + sinon.assert.calledWith(fakeFetch.getCall(1), 'https://example.com/2/'); + sinon.assert.calledWith(fakeFetch.getCall(2), 'https://example.com/2/3'); + }); + + it('returns null if redirecting to an insecure HTTP URL', async () => { + const fakeFetch = stub().resolves( + makeResponse({ + status: 301, + headers: { Location: 'http://example.com' }, + body: null, + }) + ); + + assert.isNull( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ) + ); + + sinon.assert.calledOnce(fakeFetch); + }); + + it("returns null if there's a redirection loop", async () => { + const fakeFetch = stub(); + fakeFetch.onFirstCall().resolves( + makeResponse({ + status: 301, + headers: { Location: '/2/' }, + body: null, + }) + ); + fakeFetch.onSecondCall().resolves( + makeResponse({ + status: 301, + headers: { Location: '/start' }, + body: null, + }) + ); + + assert.isNull( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com/start', + new AbortController().signal + ) + ); + + sinon.assert.calledTwice(fakeFetch); + }); + + it('returns null if redirecting more than 20 times', async () => { + const fakeFetch = stub().callsFake(async () => + makeResponse({ + status: 301, + // tslint:disable-next-line: insecure-random + headers: { Location: `/${Math.random()}` }, + body: null, + }) + ); + + assert.isNull( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com/start', + new AbortController().signal + ) + ); + + sinon.assert.callCount(fakeFetch, 20); + }); + + it('returns null if the response has no body', async () => { + const fakeFetch = stub().resolves(makeResponse({ body: null })); + + assert.isNull( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ) + ); + + sinon.assert.calledWith( + warn, + 'fetchLinkPreviewMetadata: no response body; bailing' + ); + }); + + it('returns null if the result body is too short', async () => { + const fakeFetch = stub().resolves(makeResponse({ body: '' })); + + assert.isNull( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ) + ); + + sinon.assert.calledOnce(warn); + sinon.assert.calledWith( + warn, + 'fetchLinkPreviewMetadata: Content-Length is too short; bailing' + ); + }); + + it('returns null if the result is meant to be downloaded', async () => { + const fakeFetch = stub().resolves( + makeResponse({ + headers: { 'Content-Disposition': 'attachment' }, + }) + ); + + assert.isNull( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ) + ); + + sinon.assert.calledOnce(warn); + sinon.assert.calledWith( + warn, + 'fetchLinkPreviewMetadata: Content-Disposition header is not inline; bailing' + ); + }); + + it('allows an explitly inline Content-Disposition header', async () => { + const fakeFetch = stub().resolves( + makeResponse({ + headers: { 'Content-Disposition': 'inline' }, + }) + ); + + assert.deepEqual( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ), + { + title: 'test title', + description: null, + date: null, + imageHref: null, + } + ); + }); + + it('returns null if the Content-Type is not HTML', async () => { + const fakeFetch = stub().resolves( + makeResponse({ + headers: { 'Content-Type': 'text/plain' }, + }) + ); + + assert.isNull( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ) + ); + + sinon.assert.calledOnce(warn); + sinon.assert.calledWith( + warn, + 'fetchLinkPreviewMetadata: Content-Type is not HTML; bailing' + ); + }); + + it('accepts non-lowercase Content-Type headers', async () => { + const fakeFetch = stub().resolves( + makeResponse({ + headers: { 'Content-Type': 'TEXT/HTML; chArsEt=utf-8' }, + }) + ); + + assert.deepEqual( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ), + { + title: 'test title', + description: null, + date: null, + imageHref: null, + } + ); + }); + + it('parses the response as UTF-8 if the body contains a byte order mark', async () => { + const fakeFetch = stub().resolves( + makeResponse({ + headers: { + 'Content-Type': 'text/html', + }, + body: (async function* body() { + yield new Uint8Array([0xef, 0xbb, 0xbf]); + yield new TextEncoder().encode( + '<!doctype html><title>\u{1F389}' + ); + })(), + }) + ); + + assert.deepEqual( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ), + { + title: '🎉', + description: null, + date: null, + imageHref: null, + } + ); + }); + + it('respects the UTF-8 byte order mark above the Content-Type header', async () => { + const bom = new Uint8Array([0xef, 0xbb, 0xbf]); + const titleHtml = new TextEncoder().encode('\u{1F389}'); + + const fakeFetch = stub().resolves( + makeResponse({ + headers: { + 'Content-Type': 'text/html; charset=latin1', + }, + body: (async function* body() { + yield bom; + yield titleHtml; + })(), + }) + ); + assert.propertyVal( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ), + 'title', + '🎉' + ); + }); + + it('respects the UTF-8 byte order mark above a in the document', async () => { + const bom = new Uint8Array([0xef, 0xbb, 0xbf]); + const titleHtml = new TextEncoder().encode('\u{1F389}'); + const endHeadHtml = new TextEncoder().encode(''); + + const fakeFetch = stub().resolves( + makeResponse({ + headers: { + 'Content-Type': 'text/html', + }, + body: (async function* body() { + yield bom; + yield new TextEncoder().encode( + '' + ); + yield titleHtml; + yield endHeadHtml; + })(), + }) + ); + assert.propertyVal( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ), + 'title', + '🎉' + ); + }); + + it('respects the UTF-8 byte order mark above a in the document', async () => { + const bom = new Uint8Array([0xef, 0xbb, 0xbf]); + const titleHtml = new TextEncoder().encode('\u{1F389}'); + const endHeadHtml = new TextEncoder().encode(''); + + const fakeFetch = stub().resolves( + makeResponse({ + headers: { + 'Content-Type': 'text/html', + }, + body: (async function* body() { + yield bom; + yield new TextEncoder().encode( + '' + ); + yield titleHtml; + yield endHeadHtml; + })(), + }) + ); + assert.propertyVal( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ), + 'title', + '🎉' + ); + }); + + it('respects the Content-Type header above anything in the HTML', async () => { + const titleHtml = new TextEncoder().encode('\u{1F389}'); + const endHeadHtml = new TextEncoder().encode(''); + + { + const fakeFetch = stub().resolves( + makeResponse({ + headers: { + 'Content-Type': 'text/html; charset=utf-8', + }, + body: (async function* body() { + yield new TextEncoder().encode( + '' + ); + yield titleHtml; + yield endHeadHtml; + })(), + }) + ); + assert.propertyVal( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ), + 'title', + '🎉' + ); + } + + { + const fakeFetch = stub().resolves( + makeResponse({ + headers: { + 'Content-Type': 'text/html; charset=utf-8', + }, + body: (async function* body() { + yield new TextEncoder().encode( + '' + ); + yield titleHtml; + yield endHeadHtml; + })(), + }) + ); + assert.propertyVal( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ), + 'title', + '🎉' + ); + } + }); + + it('prefers the Content-Type http-equiv in the HTML above ', async () => { + const fakeFetch = stub().resolves( + makeResponse({ + headers: { + 'Content-Type': 'text/html', + }, + body: makeHtml([ + '', + '', + '\u{1F389}', + ]), + }) + ); + + assert.propertyVal( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ), + 'title', + '🎉' + ); + }); + + it('parses non-UTF8 encodings', async () => { + const titleBytes = new Uint8Array([0x61, 0x71, 0x75, 0xed]); + assert.notDeepEqual( + new TextDecoder('utf8').decode(titleBytes), + new TextDecoder('latin1').decode(titleBytes), + 'Test data was not set up correctly' + ); + + const fakeFetch = stub().resolves( + makeResponse({ + headers: { + 'Content-Type': 'text/html; charset=latin1', + }, + body: (async function* body() { + yield new TextEncoder().encode(''); + yield titleBytes; + yield new TextEncoder().encode(''); + })(), + }) + ); + + assert.propertyVal( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ), + 'title', + 'aquí' + ); + }); + + it('handles incomplete bodies', async () => { + const fakeFetch = stub().resolves( + makeResponse({ + body: (async function* body() { + yield new TextEncoder().encode( + 'foo bar { + const shouldNeverBeCalled = sinon.stub(); + + const abortController = new AbortController(); + + const fakeFetch = stub().resolves( + makeResponse({ + body: (async function* body() { + yield new TextEncoder().encode(''); + abortController.abort(); + yield new TextEncoder().encode('should be dropped'); + shouldNeverBeCalled(); + })(), + }) + ); + + assert.isNull( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + abortController.signal + ) + ); + + sinon.assert.notCalled(shouldNeverBeCalled); + }); + + it('stops reading bodies after 500 kilobytes', async () => { + const shouldNeverBeCalled = sinon.stub(); + + const fakeFetch = stub().resolves( + makeResponse({ + body: (async function* body() { + yield new TextEncoder().encode( + 'foo bar' + ); + const spaces = new Uint8Array(1024).fill(32); + for (let i = 0; i < 500; i += 1) { + yield spaces; + } + shouldNeverBeCalled(); + yield new TextEncoder().encode( + '' + ); + })(), + }) + ); + + assert.deepEqual( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ), + { + title: 'foo bar', + description: null, + date: null, + imageHref: null, + } + ); + + sinon.assert.notCalled(shouldNeverBeCalled); + }); + + it("returns null if the HTML doesn't contain a title, even if it contains other values", async () => { + const fakeFetch = stub().resolves( + makeResponse({ + body: makeHtml([ + '', + '', + ``, + ]), + }) + ); + + assert.isNull( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ) + ); + + sinon.assert.calledOnce(warn); + sinon.assert.calledWith( + warn, + "parseMetadata: HTML document doesn't have a title; bailing" + ); + }); + + it('prefers og:title to document.title', async () => { + const fakeFetch = stub().resolves( + makeResponse({ + body: makeHtml([ + 'ignored', + '', + ]), + }) + ); + + assert.propertyVal( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ), + 'title', + 'foo bar' + ); + }); + + it('prefers og:description to ', async () => { + const fakeFetch = stub().resolves( + makeResponse({ + body: makeHtml([ + 'foo', + '', + '', + ]), + }) + ); + + assert.propertyVal( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ), + 'description', + 'bar' + ); + }); + + it('parses ', async () => { + const fakeFetch = stub().resolves( + makeResponse({ + body: makeHtml([ + 'foo', + '', + ]), + }) + ); + + assert.propertyVal( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ), + 'description', + 'bar' + ); + }); + + it('ignores empty descriptions', async () => { + const fakeFetch = stub().resolves( + makeResponse({ + body: makeHtml([ + 'foo', + '', + ]), + }) + ); + + assert.propertyVal( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ), + 'description', + null + ); + }); + + it('parses absolute image URLs', async () => { + const fakeFetch = stub().resolves( + makeResponse({ + body: makeHtml([ + 'foo', + '', + ]), + }) + ); + + assert.propertyVal( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ), + 'imageHref', + 'https://example.com/image.jpg' + ); + }); + + it('parses relative image URLs', async () => { + const fakeFetch = stub().resolves( + makeResponse({ + body: makeHtml([ + 'foo', + '', + ]), + }) + ); + + assert.propertyVal( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ), + 'imageHref', + 'https://example.com/assets/image.jpg' + ); + }); + + it('relative image URL resolution is relative to the final URL after redirects, not the original URL', async () => { + const fakeFetch = stub().resolves( + makeResponse({ + body: makeHtml([ + 'foo', + '', + ]), + url: 'https://bar.example/assets/', + }) + ); + + assert.propertyVal( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://foo.example', + new AbortController().signal + ), + 'imageHref', + 'https://bar.example/assets/image.jpg' + ); + }); + + it('ignores empty image URLs', async () => { + const fakeFetch = stub().resolves( + makeResponse({ + body: makeHtml([ + 'foo', + '', + ]), + }) + ); + + assert.propertyVal( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ), + 'imageHref', + null + ); + }); + + it('ignores blank image URLs', async () => { + const fakeFetch = stub().resolves( + makeResponse({ + body: makeHtml([ + 'foo', + '', + ]), + }) + ); + + assert.propertyVal( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ), + 'imageHref', + null + ); + }); + }); + + // tslint:disable-next-line: max-func-body-length + describe('fetchLinkPreviewImage', () => { + const readFixture = async (filename: string): Promise => { + const result = await fs.promises.readFile( + path.join(__dirname, '..', '..', '..', 'fixtures', filename) + ); + assert(result.length > 10, `Test failed to read fixture ${filename}`); + return result; + }; + + [ + { + title: 'JPEG', + contentType: 'image/jpeg', + fixtureFilename: 'kitten-1-64-64.jpg', + }, + { + title: 'PNG', + contentType: 'image/png', + fixtureFilename: + 'freepngs-2cd43b_bed7d1327e88454487397574d87b64dc_mv2.png', + }, + { + title: 'GIF', + contentType: 'image/gif', + fixtureFilename: 'giphy-GVNvOUpeYmI7e.gif', + }, + { + title: 'WEBP', + contentType: 'image/webp', + fixtureFilename: '512x515-thumbs-up-lincoln.webp', + }, + { + title: 'ICO', + contentType: 'image/x-icon', + fixtureFilename: 'kitten-1-64-64.ico', + }, + ].forEach(({ title, contentType, fixtureFilename }) => { + it(`handles ${title} images`, async () => { + const fixture = await readFixture(fixtureFilename); + + const fakeFetch = stub().resolves( + new Response(fixture, { + headers: { + 'Content-Type': contentType, + 'Content-Length': fixture.length.toString(), + }, + }) + ); + + assert.deepEqual( + await fetchLinkPreviewImage( + fakeFetch, + 'https://example.com/img', + new AbortController().signal + ), + { + data: fixture.buffer, + contentType: contentType, + } + ); + }); + }); + + it('returns null if the request fails', async () => { + const fakeFetch = stub().rejects(new Error('Test request failure')); + + assert.isNull( + await fetchLinkPreviewImage( + fakeFetch, + 'https://example.com/img', + new AbortController().signal + ) + ); + + sinon.assert.calledOnce(warn); + sinon.assert.calledWith( + warn, + 'fetchLinkPreviewImage: failed to fetch image; bailing' + ); + }); + + it("returns null if the response status code isn't 2xx", async () => { + const fixture = await readFixture('kitten-1-64-64.jpg'); + + await Promise.all( + [400, 404, 500, 598].map(async status => { + const fakeFetch = stub().resolves( + new Response(fixture, { + status, + headers: { + 'Content-Type': 'image/jpeg', + 'Content-Length': fixture.length.toString(), + }, + }) + ); + + assert.isNull( + await fetchLinkPreviewImage( + fakeFetch, + 'https://example.com/img', + new AbortController().signal + ) + ); + + sinon.assert.calledWith( + warn, + `fetchLinkPreviewImage: got a ${status} status code; bailing` + ); + }) + ); + }); + + // Most of the redirect behavior is tested above. + it('handles 301 redirects', async () => { + const fixture = await readFixture('kitten-1-64-64.jpg'); + + const fakeFetch = stub(); + fakeFetch.onFirstCall().resolves( + new Response(null, { + status: 301, + headers: { + Location: '/result.jpg', + }, + }) + ); + fakeFetch.onSecondCall().resolves( + new Response(fixture, { + headers: { + 'Content-Type': IMAGE_JPEG, + 'Content-Length': fixture.length.toString(), + }, + }) + ); + + assert.deepEqual( + await fetchLinkPreviewImage( + fakeFetch, + 'https://example.com/img', + new AbortController().signal + ), + { + data: fixture.buffer, + contentType: IMAGE_JPEG, + } + ); + + sinon.assert.calledTwice(fakeFetch); + sinon.assert.calledWith(fakeFetch.getCall(0), 'https://example.com/img'); + sinon.assert.calledWith( + fakeFetch.getCall(1), + 'https://example.com/result.jpg' + ); + }); + + it('returns null if the response is too small', async () => { + const fakeFetch = stub().resolves( + new Response(await readFixture('kitten-1-64-64.jpg'), { + headers: { + 'Content-Type': 'image/jpeg', + 'Content-Length': '2', + }, + }) + ); + + assert.isNull( + await fetchLinkPreviewImage( + fakeFetch, + 'https://example.com/img', + new AbortController().signal + ) + ); + + sinon.assert.calledOnce(warn); + sinon.assert.calledWith( + warn, + 'fetchLinkPreviewImage: Content-Length is too short; bailing' + ); + }); + + it('returns null if the response is too large', async () => { + const fakeFetch = stub().resolves( + new Response(await readFixture('kitten-1-64-64.jpg'), { + headers: { + 'Content-Type': 'image/jpeg', + 'Content-Length': '123456789', + }, + }) + ); + + assert.isNull( + await fetchLinkPreviewImage( + fakeFetch, + 'https://example.com/img', + new AbortController().signal + ) + ); + + sinon.assert.calledOnce(warn); + sinon.assert.calledWith( + warn, + 'fetchLinkPreviewImage: Content-Length is too large or is unset; bailing' + ); + }); + + it('returns null if the Content-Type is not a valid image', async () => { + const fixture = await readFixture('kitten-1-64-64.jpg'); + + await Promise.all( + ['', 'image/tiff', 'video/mp4', 'text/plain', 'application/html'].map( + async contentType => { + const fakeFetch = stub().resolves( + new Response(fixture, { + headers: { + 'Content-Type': contentType, + 'Content-Length': fixture.length.toString(), + }, + }) + ); + + assert.isNull( + await fetchLinkPreviewImage( + fakeFetch, + 'https://example.com/img', + new AbortController().signal + ) + ); + + sinon.assert.calledWith( + warn, + 'fetchLinkPreviewImage: Content-Type is not an image; bailing' + ); + } + ) + ); + }); + + it('sends "WhatsApp" as the User-Agent for compatibility', async () => { + const fakeFetch = stub().resolves(new Response(null)); + + await fetchLinkPreviewImage( + fakeFetch, + 'https://example.com/img', + new AbortController().signal + ); + + sinon.assert.calledWith( + fakeFetch, + 'https://example.com/img', + sinon.match({ + headers: { + 'User-Agent': 'WhatsApp', + }, + }) + ); + }); + }); +}); diff --git a/ts/types/Attachment.ts b/ts/types/Attachment.ts index c0d447fcf..b0d2902af 100644 --- a/ts/types/Attachment.ts +++ b/ts/types/Attachment.ts @@ -143,10 +143,10 @@ export function isVideo(attachments?: Array) { return attachments && isVideoAttachment(attachments[0]); } -export function isVideoAttachment(attachment?: AttachmentType) { +export function isVideoAttachment(attachment?: AttachmentType): boolean { return ( - attachment && - attachment.contentType && + !!attachment && + !!attachment.contentType && isVideoTypeSupported(attachment.contentType) ); } @@ -166,6 +166,18 @@ type DimensionsType = { width: number; }; +export async function arrayBufferFromFile(file: any): Promise { + return new Promise((resolve, reject) => { + const FR = new FileReader(); + FR.onload = (e: any) => { + resolve(e.target.result); + }; + FR.onerror = reject; + FR.onabort = reject; + FR.readAsArrayBuffer(file); + }); +} + export function getImageDimensions(attachment: AttachmentType): DimensionsType { const { height, width } = attachment; if (!height || !width) { diff --git a/ts/types/MIME.ts b/ts/types/MIME.ts index d34ae75c0..8766c1557 100644 --- a/ts/types/MIME.ts +++ b/ts/types/MIME.ts @@ -1,4 +1,4 @@ -export type MIMEType = string & { _mimeTypeBrand: any }; +export type MIMEType = string; export const APPLICATION_OCTET_STREAM = 'application/octet-stream' as MIMEType; export const APPLICATION_JSON = 'application/json' as MIMEType; @@ -7,13 +7,19 @@ export const AUDIO_WEBM = 'audio/webm' as MIMEType; export const AUDIO_MP3 = 'audio/mp3' as MIMEType; export const IMAGE_GIF = 'image/gif' as MIMEType; export const IMAGE_JPEG = 'image/jpeg' as MIMEType; +export const IMAGE_BMP = 'image/bmp' as MIMEType; +export const IMAGE_ICO = 'image/x-icon' as MIMEType; +export const IMAGE_WEBP = 'image/webp' as MIMEType; +export const IMAGE_PNG = 'image/png' as MIMEType; export const VIDEO_MP4 = 'video/mp4' as MIMEType; export const VIDEO_QUICKTIME = 'video/quicktime' as MIMEType; export const isJPEG = (value: MIMEType): boolean => value === 'image/jpeg'; -export const isImage = (value?: MIMEType): boolean => - !!value && value.startsWith('image/'); +export const isImage = (value: MIMEType): boolean => + value?.length > 0 && value.startsWith('image/'); export const isVideo = (value: MIMEType): boolean => - value && value.startsWith('video/'); + value?.length > 0 && value.startsWith('video/'); +// As of 2020-04-16 aif files do not play in Electron nor Chrome. We should only +// recognize them as file attachments. export const isAudio = (value: MIMEType): boolean => - value && value.startsWith('audio/'); + value?.length > 0 && value.startsWith('audio/') && !value.endsWith('aiff'); diff --git a/ts/util/attachmentsUtil.ts b/ts/util/attachmentsUtil.ts index bbd7ad3fa..f8531f7b8 100644 --- a/ts/util/attachmentsUtil.ts +++ b/ts/util/attachmentsUtil.ts @@ -90,7 +90,7 @@ export async function getFile(attachment: StagedAttachmentType) { }; } -async function readFile(attachment: any): Promise { +export async function readFile(attachment: any): Promise { return new Promise((resolve, reject) => { const FR = new FileReader(); FR.onload = e => { diff --git a/ts/util/index.ts b/ts/util/index.ts index a7b20b103..9a4376c9e 100644 --- a/ts/util/index.ts +++ b/ts/util/index.ts @@ -8,6 +8,7 @@ import { FindMember } from './findMember'; import * as UserUtil from './user'; import * as PasswordUtil from './passwordUtils'; import * as AttachmentUtil from './attachmentsUtil'; +import * as LinkPreviewUtil from './linkPreviewFetch'; export * from './blockedNumberController'; @@ -22,4 +23,5 @@ export { PasswordUtil, FindMember, AttachmentUtil, + LinkPreviewUtil, }; diff --git a/ts/util/isLinkPreviewDateValid.ts b/ts/util/isLinkPreviewDateValid.ts new file mode 100644 index 000000000..5dc9e3e3a --- /dev/null +++ b/ts/util/isLinkPreviewDateValid.ts @@ -0,0 +1,11 @@ +const ONE_DAY = 24 * 60 * 60 * 1000; + +export function isLinkPreviewDateValid(value: unknown): value is number { + const maximumLinkPreviewDate = Date.now() + ONE_DAY; + return ( + typeof value === 'number' && + value !== 0 && + Number.isFinite(value) && + value < maximumLinkPreviewDate + ); +} diff --git a/ts/util/linkPreviewFetch.ts b/ts/util/linkPreviewFetch.ts new file mode 100644 index 000000000..2f3900247 --- /dev/null +++ b/ts/util/linkPreviewFetch.ts @@ -0,0 +1,569 @@ +import { RequestInit, Response } from 'node-fetch'; +import { AbortSignal } from 'abort-controller'; + +import { + IMAGE_GIF, + IMAGE_ICO, + IMAGE_JPEG, + IMAGE_PNG, + IMAGE_WEBP, + MIMEType, +} from '../types/MIME'; + +const MAX_REQUEST_COUNT_WITH_REDIRECTS = 20; +// tslint:disable: prefer-for-of + +// Lifted from the `fetch` spec [here][0]. +// [0]: https://fetch.spec.whatwg.org/#redirect-status +const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]); + +const MAX_CONTENT_TYPE_LENGTH_TO_PARSE = 100; + +// Though we'll accept HTML of any Content-Length (including no specified length), we +// will only load some of the HTML. So we might start loading a 99 gigabyte HTML page +// but only parse the first 500 kilobytes. However, if the Content-Length is less than +// this, we won't waste space. +const MAX_HTML_BYTES_TO_LOAD = 500 * 1024; + +// `x` is 8 bytes. Nothing else (meta tags, etc) will even fit, so we can ignore +// it. This is mostly to protect us against empty response bodies. +const MIN_HTML_CONTENT_LENGTH = 8; + +// Similar to the above. We don't want to show tiny images (even though the more likely +// case is that the Content-Length is 0). +const MIN_IMAGE_CONTENT_LENGTH = 8; +const MAX_IMAGE_CONTENT_LENGTH = 1024 * 1024; +const VALID_IMAGE_MIME_TYPES: Set<MIMEType> = new Set([ + IMAGE_GIF, + IMAGE_ICO, + IMAGE_JPEG, + IMAGE_PNG, + IMAGE_WEBP, +]); + +// We want to discard unreasonable dates. Update this in ~950 years. (This may discard +// some reasonable dates, which is okay because it is only for link previews.) +const MIN_DATE = 0; +const MAX_DATE = new Date(3000, 0, 1).valueOf(); + +const emptyContentType = { type: null, charset: null }; + +type FetchFn = (href: string, init: RequestInit) => Promise<Response>; + +export interface LinkPreviewMetadata { + title: string; + description: null | string; + date: null | number; + imageHref: null | string; +} + +export interface LinkPreviewImage { + data: ArrayBuffer; + contentType: MIMEType; +} + +type ParsedContentType = + | { type: null; charset: null } + | { type: MIMEType; charset: null | string }; + +// This throws non-helpful errors because (1) it logs (2) it will be immediately caught. +async function fetchWithRedirects( + fetchFn: FetchFn, + href: string, + options: RequestInit +): Promise<Response> { + const urlsSeen = new Set<string>(); + + let nextHrefToLoad = href; + for (let i = 0; i < MAX_REQUEST_COUNT_WITH_REDIRECTS; i += 1) { + if (urlsSeen.has(nextHrefToLoad)) { + window.log.warn('fetchWithRedirects: found a redirect loop'); + throw new Error('redirect loop'); + } + urlsSeen.add(nextHrefToLoad); + + // This `await` is deliberatly inside of a loop. + // eslint-disable-next-line no-await-in-loop + const response = await fetchFn(nextHrefToLoad, { + ...options, + redirect: 'manual', + }); + + if (!REDIRECT_STATUSES.has(response.status)) { + return response; + } + + const location = response.headers.get('location'); + if (!location) { + window.log.warn( + 'fetchWithRedirects: got a redirect status code but no Location header; bailing' + ); + throw new Error('no location with redirect'); + } + + const newUrl = maybeParseUrl(location, nextHrefToLoad); + if (newUrl?.protocol !== 'https:') { + window.log.warn( + 'fetchWithRedirects: got a redirect status code and an invalid Location header' + ); + throw new Error('invalid location'); + } + + nextHrefToLoad = newUrl.href; + } + + window.log.warn('fetchWithRedirects: too many redirects'); + throw new Error('too many redirects'); +} + +function maybeParseUrl(href: string, base: string): null | URL { + let result: URL; + try { + result = new URL(href, base); + } catch (err) { + return null; + } + // We never need the hash + result.hash = ''; + return result; +} + +/** + * Parses a Content-Type header value. Refer to [RFC 2045][0] for details (though this is + * a simplified version for link previews. + * [0]: https://tools.ietf.org/html/rfc2045 + */ +const parseContentType = (headerValue: string | null): ParsedContentType => { + if (!headerValue || headerValue.length > MAX_CONTENT_TYPE_LENGTH_TO_PARSE) { + return emptyContentType; + } + + const [rawType, ...rawParameters] = headerValue + .toLowerCase() + .split(/;/g) + .map(part => part.trim()) + .filter(Boolean); + if (!rawType) { + return emptyContentType; + } + + let charset: null | string = null; + for (let i = 0; i < rawParameters.length; i += 1) { + const rawParameter = rawParameters[i]; + const parsed = new URLSearchParams(rawParameter); + const parsedCharset = parsed.get('charset')?.trim(); + if (parsedCharset) { + charset = parsedCharset; + break; + } + } + + return { + type: rawType, + charset, + }; +}; + +const isInlineContentDisposition = (headerValue: string | null): boolean => + !headerValue || headerValue.split(';', 1)[0] === 'inline'; + +const parseContentLength = (headerValue: string | null): number => { + // No need to parse gigantic Content-Lengths; only parse the first 10 digits. + if (typeof headerValue !== 'string' || !/^\d{1,10}$/g.test(headerValue)) { + return Infinity; + } + const result = parseInt(headerValue, 10); + return Number.isNaN(result) ? Infinity : result; +}; + +const emptyHtmlDocument = (): HTMLDocument => + new DOMParser().parseFromString('', 'text/html'); + +// The charset behavior here follows the [W3 guidelines][0]. The priority is BOM, HTTP +// header, `http-equiv` meta tag, `charset` meta tag, and finally a UTF-8 fallback. +// (This fallback could, perhaps, be smarter based on user locale.) +// [0]: https://www.w3.org/International/questions/qa-html-encoding-declarations.en +const parseHtmlBytes = ( + bytes: Readonly<Uint8Array>, + httpCharset: string | null +): HTMLDocument => { + const hasBom = bytes[0] === 0xef && bytes[1] === 0xbb && bytes[2] === 0xbf; + + let isSureOfCharset: boolean; + let decoder: TextDecoder; + if (hasBom) { + decoder = new TextDecoder(); + isSureOfCharset = true; + } else if (httpCharset) { + try { + decoder = new TextDecoder(httpCharset); + isSureOfCharset = true; + } catch (err) { + decoder = new TextDecoder(); + isSureOfCharset = false; + } + } else { + decoder = new TextDecoder(); + isSureOfCharset = false; + } + + let decoded: string; + try { + decoded = decoder.decode(bytes); + } catch (err) { + decoded = ''; + } + + let document: HTMLDocument; + try { + document = new DOMParser().parseFromString(decoded, 'text/html'); + } catch (err) { + document = emptyHtmlDocument(); + } + + if (!isSureOfCharset) { + const httpEquiv = document + .querySelector('meta[http-equiv="content-type"]') + ?.getAttribute('content'); + if (httpEquiv) { + const httpEquivCharset = parseContentType(httpEquiv).charset; + if (httpEquivCharset) { + return parseHtmlBytes(bytes, httpEquivCharset); + } + } + + const metaCharset = document + .querySelector('meta[charset]') + ?.getAttribute('charset'); + if (metaCharset) { + return parseHtmlBytes(bytes, metaCharset); + } + } + + return document; +}; + +const getHtmlDocument = async ( + body: AsyncIterable<string | Uint8Array>, + contentLength: number, + httpCharset: string | null, + abortSignal: AbortSignal +): Promise<HTMLDocument> => { + let result: HTMLDocument = emptyHtmlDocument(); + + const maxHtmlBytesToLoad = Math.min(contentLength, MAX_HTML_BYTES_TO_LOAD); + const buffer = new Uint8Array(new ArrayBuffer(maxHtmlBytesToLoad)); + let bytesLoadedSoFar = 0; + + try { + // `for ... of` is much cleaner here, so we allow it. + /* eslint-disable no-restricted-syntax */ + for await (let chunk of body) { + if (abortSignal.aborted) { + break; + } + + // This check exists to satisfy TypeScript; chunk should always be a Buffer. + if (typeof chunk === 'string') { + if ( + httpCharset !== null && + httpCharset !== undefined && + Buffer.isEncoding(httpCharset) + ) { + chunk = Buffer.from(chunk, httpCharset); + } else { + chunk = Buffer.from(chunk, 'utf8'); + } + } + + const truncatedChunk = chunk.slice( + 0, + maxHtmlBytesToLoad - bytesLoadedSoFar + ); + buffer.set(truncatedChunk, bytesLoadedSoFar); + bytesLoadedSoFar += truncatedChunk.byteLength; + + result = parseHtmlBytes(buffer.slice(0, bytesLoadedSoFar), httpCharset); + + const hasLoadedMaxBytes = bytesLoadedSoFar >= maxHtmlBytesToLoad; + if (hasLoadedMaxBytes) { + break; + } + } + /* eslint-enable no-restricted-syntax */ + } catch (err) { + window.log.warn( + 'getHtmlDocument: error when reading body; continuing with what we got' + ); + } + + return result; +}; + +const getOpenGraphContent = ( + document: HTMLDocument, + properties: ReadonlyArray<string> +): string | null => { + for (let i = 0; i < properties.length; i += 1) { + const property = properties[i]; + const content = document + .querySelector(`meta[property="${property}"]`) + ?.getAttribute('content') + ?.trim(); + if (content) { + return content; + } + } + return null; +}; + +const getLinkHrefAttribute = ( + document: HTMLDocument, + rels: ReadonlyArray<string> +): string | null => { + for (let i = 0; i < rels.length; i += 1) { + const rel = rels[i]; + const href = document + .querySelector(`link[rel="${rel}"]`) + ?.getAttribute('href') + ?.trim(); + if (href) { + return href; + } + } + return null; +}; + +const parseMetadata = ( + document: HTMLDocument, + href: string +): LinkPreviewMetadata | null => { + const title = + getOpenGraphContent(document, ['og:title']) || document.title.trim(); + if (!title) { + window.log.warn( + "parseMetadata: HTML document doesn't have a title; bailing" + ); + return null; + } + + const description = + getOpenGraphContent(document, ['og:description']) || + document + .querySelector('meta[name="description"]') + ?.getAttribute('content') + ?.trim() || + null; + + const rawImageHref = + getOpenGraphContent(document, ['og:image', 'og:image:url']) || + getLinkHrefAttribute(document, [ + 'shortcut icon', + 'icon', + 'apple-touch-icon', + ]); + const imageUrl = rawImageHref ? maybeParseUrl(rawImageHref, href) : null; + const imageHref = imageUrl ? imageUrl.href : null; + + let date: number | null = null; + const rawDate = getOpenGraphContent(document, [ + 'og:published_time', + 'article:published_time', + 'og:modified_time', + 'article:modified_time', + ]); + if (rawDate) { + const parsed = Date.parse(rawDate); + if (parsed > MIN_DATE && parsed < MAX_DATE) { + date = parsed; + } + } + + return { + title, + description, + imageHref, + date, + }; +}; + +/** + * This attempts to fetch link preview metadata, returning `null` if it cannot be found + * for any reason. + * + * NOTE: This does NOT validate the incoming URL for safety. For example, it may fetch an + * insecure HTTP href. It also does not offer a timeout; that is up to the caller. + * + * At a high level, it: + * + * 1. Makes a GET request, following up to 20 redirects (`fetch`'s default). + * 2. Checks the response status code and headers to make sure it's a normal HTML + * response. + * 3. Streams up to `MAX_HTML_BYTES_TO_LOAD`, stopping when (1) it has loaded all of the + * HTML (2) loaded the maximum number of bytes (3) finished loading the `<head>`. + * 4. Parses the resulting HTML with `DOMParser`. + * 5. Grabs the title, description, image URL, and date. + */ +export async function fetchLinkPreviewMetadata( + fetchFn: FetchFn, + href: string, + abortSignal: AbortSignal +): Promise<null | LinkPreviewMetadata> { + let response: Response; + try { + response = await fetchWithRedirects(fetchFn, href, { + headers: { + Accept: 'text/html,application/xhtml+xml', + 'User-Agent': 'WhatsApp', + }, + signal: abortSignal, + }); + } catch (err) { + window.log.warn( + 'fetchLinkPreviewMetadata: failed to fetch link preview HTML; bailing' + ); + return null; + } + + if (!response.ok) { + window.log.warn( + `fetchLinkPreviewMetadata: got a ${response.status} status code; bailing` + ); + return null; + } + + if (!response.body) { + window.log.warn('fetchLinkPreviewMetadata: no response body; bailing'); + return null; + } + + if ( + !isInlineContentDisposition(response.headers.get('Content-Disposition')) + ) { + window.log.warn( + 'fetchLinkPreviewMetadata: Content-Disposition header is not inline; bailing' + ); + return null; + } + + if (abortSignal.aborted) { + return null; + } + + const contentLength = parseContentLength( + response.headers.get('Content-Length') + ); + if (contentLength < MIN_HTML_CONTENT_LENGTH) { + window.log.warn( + 'fetchLinkPreviewMetadata: Content-Length is too short; bailing' + ); + return null; + } + + const contentType = parseContentType(response.headers.get('Content-Type')); + if (contentType.type !== 'text/html') { + window.log.warn( + 'fetchLinkPreviewMetadata: Content-Type is not HTML; bailing' + ); + return null; + } + + const document = await getHtmlDocument( + response.body, + contentLength, + contentType.charset, + abortSignal + ); + + // [The Node docs about `ReadableStream.prototype[Symbol.asyncIterator]`][0] say that + // the stream will be destroyed if you `break` out of the loop, but I could not + // reproduce this. Also [`destroy` is a documented method][1] but it is not in the + // Node types, which is why we do this cast to `any`. + // [0]: https://nodejs.org/docs/latest-v12.x/api/stream.html#stream_readable_symbol_asynciterator + // [1]: https://nodejs.org/docs/latest-v12.x/api/stream.html#stream_readable_destroy_error + try { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (response.body as any).destroy(); + } catch (err) { + // Ignored. + } + + if (abortSignal.aborted) { + return null; + } + + return parseMetadata(document, response.url); +} + +/** + * This attempts to fetch an image, returning `null` if it fails for any reason. + * + * NOTE: This does NOT validate the incoming URL for safety. For example, it may fetch an + * insecure HTTP href. It also does not offer a timeout; that is up to the caller. + */ +export async function fetchLinkPreviewImage( + fetchFn: FetchFn, + href: string, + abortSignal: AbortSignal +): Promise<null | LinkPreviewImage> { + let response: Response; + try { + response = await fetchWithRedirects(fetchFn, href, { + headers: { + 'User-Agent': 'WhatsApp', + }, + size: MAX_IMAGE_CONTENT_LENGTH, + signal: abortSignal, + }); + } catch (err) { + window.log.warn('fetchLinkPreviewImage: failed to fetch image; bailing'); + return null; + } + + if (abortSignal.aborted) { + return null; + } + + if (!response.ok) { + window.log.warn( + `fetchLinkPreviewImage: got a ${response.status} status code; bailing` + ); + return null; + } + + const contentLength = parseContentLength( + response.headers.get('Content-Length') + ); + if (contentLength < MIN_IMAGE_CONTENT_LENGTH) { + window.log.warn( + 'fetchLinkPreviewImage: Content-Length is too short; bailing' + ); + return null; + } + if (contentLength > MAX_IMAGE_CONTENT_LENGTH) { + window.log.warn( + 'fetchLinkPreviewImage: Content-Length is too large or is unset; bailing' + ); + return null; + } + + const { type: contentType } = parseContentType( + response.headers.get('Content-Type') + ); + if (!contentType || !VALID_IMAGE_MIME_TYPES.has(contentType)) { + window.log.warn( + 'fetchLinkPreviewImage: Content-Type is not an image; bailing' + ); + return null; + } + + let data: ArrayBuffer; + try { + data = await response.arrayBuffer(); + } catch (err) { + window.log.warn('fetchLinkPreviewImage: failed to read body; bailing'); + return null; + } + + return { data, contentType }; +} diff --git a/yarn.lock b/yarn.lock index f7047a8ac..b98cbf2f0 100644 --- a/yarn.lock +++ b/yarn.lock @@ -744,6 +744,13 @@ abbrev@1: resolved "https://registry.yarnpkg.com/abbrev/-/abbrev-1.1.1.tgz#f8f2c887ad10bf67f634f005b6987fed3179aac8" integrity sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q== +abort-controller@3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/abort-controller/-/abort-controller-3.0.0.tgz#eaf54d53b62bae4138e809ca225c8439a6efb392" + integrity sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg== + dependencies: + event-target-shim "^5.0.0" + accepts@~1.3.4, accepts@~1.3.5, accepts@~1.3.7: version "1.3.7" resolved "https://registry.yarnpkg.com/accepts/-/accepts-1.3.7.tgz#531bc726517a3b2b41f850021c6cc15eaab507cd" @@ -3775,6 +3782,11 @@ etag@~1.8.1: resolved "https://registry.yarnpkg.com/etag/-/etag-1.8.1.tgz#41ae2eeb65efa62268aebfea83ac7d79299b0887" integrity sha1-Qa4u62XvpiJorr/qg6x9eSmbCIc= +event-target-shim@^5.0.0: + version "5.0.1" + resolved "https://registry.yarnpkg.com/event-target-shim/-/event-target-shim-5.0.1.tgz#5d4d3ebdf9583d63a5333ce2deb7480ab2b05789" + integrity sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ== + eventemitter2@~0.4.13: version "0.4.14" resolved "https://registry.yarnpkg.com/eventemitter2/-/eventemitter2-0.4.14.tgz#8f61b75cde012b2e9eb284d4545583b5643b61ab"