add staged link previews for everything from signal
parent
f5818817e9
commit
13e02b5bf1
@ -1 +1,3 @@
|
||||
export function isLinkSafeToPreview(link: string): boolean;
|
||||
|
||||
export function isLinkSneaky(link: string): boolean;
|
||||
|
@ -0,0 +1,174 @@
|
||||
import React, { useEffect, useState } from 'react';
|
||||
import { arrayBufferFromFile, AttachmentType } from '../../../types/Attachment';
|
||||
import { AttachmentUtil, LinkPreviewUtil } from '../../../util';
|
||||
import { StagedLinkPreview } from '../../conversation/StagedLinkPreview';
|
||||
import fetch from 'node-fetch';
|
||||
import { fetchLinkPreviewImage } from '../../../util/linkPreviewFetch';
|
||||
import { AbortController, AbortSignal } from 'abort-controller';
|
||||
|
||||
type Props = {
|
||||
url: string;
|
||||
onClose: () => void;
|
||||
};
|
||||
const LINK_PREVIEW_TIMEOUT = 60 * 1000;
|
||||
|
||||
export interface GetLinkPreviewResultImage {
|
||||
data: ArrayBuffer;
|
||||
size: number;
|
||||
contentType: string;
|
||||
width: number;
|
||||
height: number;
|
||||
}
|
||||
|
||||
export interface GetLinkPreviewResult {
|
||||
title: string;
|
||||
url: string;
|
||||
image?: GetLinkPreviewResultImage;
|
||||
description: string | null;
|
||||
date: number | null;
|
||||
}
|
||||
|
||||
const getPreview = async (
|
||||
url: string,
|
||||
abortSignal: AbortSignal
|
||||
): Promise<null | GetLinkPreviewResult> => {
|
||||
// This is already checked elsewhere, but we want to be extra-careful.
|
||||
if (!window.Signal.LinkPreviews.isLinkSafeToPreview(url)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const linkPreviewMetadata = await LinkPreviewUtil.fetchLinkPreviewMetadata(
|
||||
fetch,
|
||||
url,
|
||||
abortSignal
|
||||
);
|
||||
if (!linkPreviewMetadata) {
|
||||
return null;
|
||||
}
|
||||
const { title, imageHref, description, date } = linkPreviewMetadata;
|
||||
|
||||
let image;
|
||||
if (imageHref && window.Signal.LinkPreviews.isLinkSafeToPreview(imageHref)) {
|
||||
let objectUrl: void | string;
|
||||
try {
|
||||
const fullSizeImage = await fetchLinkPreviewImage(
|
||||
fetch,
|
||||
imageHref,
|
||||
abortSignal
|
||||
);
|
||||
if (!fullSizeImage) {
|
||||
throw new Error('Failed to fetch link preview image');
|
||||
}
|
||||
|
||||
// Ensure that this file is either small enough or is resized to meet our
|
||||
// requirements for attachments
|
||||
const withBlob = await AttachmentUtil.autoScale({
|
||||
contentType: fullSizeImage.contentType,
|
||||
file: new Blob([fullSizeImage.data], {
|
||||
type: fullSizeImage.contentType,
|
||||
}),
|
||||
});
|
||||
|
||||
const data = await arrayBufferFromFile(withBlob.file);
|
||||
objectUrl = URL.createObjectURL(withBlob.file);
|
||||
|
||||
const dimensions = await window.Signal.Types.VisualAttachment.getImageDimensions(
|
||||
{
|
||||
objectUrl,
|
||||
logger: window.log,
|
||||
}
|
||||
);
|
||||
|
||||
image = {
|
||||
data,
|
||||
size: data.byteLength,
|
||||
...dimensions,
|
||||
contentType: withBlob.file.type,
|
||||
};
|
||||
} catch (error) {
|
||||
// We still want to show the preview if we failed to get an image
|
||||
window.log.error(
|
||||
'getPreview failed to get image for link preview:',
|
||||
error.message
|
||||
);
|
||||
} finally {
|
||||
if (objectUrl) {
|
||||
URL.revokeObjectURL(objectUrl);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
title,
|
||||
url,
|
||||
image,
|
||||
description,
|
||||
date,
|
||||
};
|
||||
};
|
||||
|
||||
export const SessionStagedLinkPreview = (props: Props) => {
|
||||
const [isLoaded, setIsLoaded] = useState(false);
|
||||
const [title, setTitle] = useState<string | null>(null);
|
||||
const [domain, setDomain] = useState<string | null>(null);
|
||||
const [description, setDescription] = useState<string | null>(null);
|
||||
const [image, setImage] = useState<AttachmentType | undefined>(undefined);
|
||||
|
||||
useEffect(() => {
|
||||
// Use this abortcontroller to stop current fetch requests when url changed
|
||||
const abortController = new AbortController();
|
||||
setTimeout(() => {
|
||||
abortController.abort();
|
||||
}, LINK_PREVIEW_TIMEOUT);
|
||||
|
||||
setIsLoaded(false);
|
||||
setTitle(null);
|
||||
setDomain(null);
|
||||
setDescription(null);
|
||||
setImage(undefined);
|
||||
|
||||
getPreview(props.url, abortController.signal)
|
||||
.then(ret => {
|
||||
setIsLoaded(true);
|
||||
if (ret) {
|
||||
setTitle(ret.title);
|
||||
if (ret.image?.width) {
|
||||
if (ret.image) {
|
||||
const blob = new Blob([ret.image.data], {
|
||||
type: ret.image.contentType,
|
||||
});
|
||||
const imageAttachment = {
|
||||
...ret.image,
|
||||
url: URL.createObjectURL(blob),
|
||||
fileName: 'preview',
|
||||
};
|
||||
setImage(imageAttachment);
|
||||
}
|
||||
}
|
||||
setDomain(window.Signal.LinkPreviews.getDomain(ret.url));
|
||||
if (ret.description) {
|
||||
setDescription(ret.description);
|
||||
}
|
||||
}
|
||||
})
|
||||
.catch(err => {
|
||||
abortController.abort();
|
||||
setIsLoaded(true);
|
||||
});
|
||||
return () => {
|
||||
// Cancel other in-flight link preview requests.
|
||||
abortController.abort();
|
||||
};
|
||||
}, [props.url]);
|
||||
|
||||
return (
|
||||
<StagedLinkPreview
|
||||
onClose={props.onClose}
|
||||
isLoaded={isLoaded}
|
||||
title={title}
|
||||
domain={domain}
|
||||
image={image as any}
|
||||
description={description}
|
||||
/>
|
||||
);
|
||||
};
|
@ -0,0 +1,41 @@
|
||||
import { assert } from 'chai';
|
||||
|
||||
import { isLinkPreviewDateValid } from '../../util/isLinkPreviewDateValid';
|
||||
|
||||
describe('isLinkPreviewDateValid', () => {
|
||||
it('returns false for non-numbers', () => {
|
||||
assert.isFalse(isLinkPreviewDateValid(null));
|
||||
assert.isFalse(isLinkPreviewDateValid(undefined));
|
||||
assert.isFalse(isLinkPreviewDateValid(Date.now().toString()));
|
||||
assert.isFalse(isLinkPreviewDateValid(new Date()));
|
||||
});
|
||||
|
||||
it('returns false for zero', () => {
|
||||
assert.isFalse(isLinkPreviewDateValid(0));
|
||||
assert.isFalse(isLinkPreviewDateValid(-0));
|
||||
});
|
||||
|
||||
it('returns false for NaN', () => {
|
||||
assert.isFalse(isLinkPreviewDateValid(0 / 0));
|
||||
});
|
||||
|
||||
it('returns false for any infinite value', () => {
|
||||
assert.isFalse(isLinkPreviewDateValid(Infinity));
|
||||
assert.isFalse(isLinkPreviewDateValid(-Infinity));
|
||||
});
|
||||
|
||||
it('returns false for timestamps more than a day from now', () => {
|
||||
const twoDays = 2 * 24 * 60 * 60 * 1000;
|
||||
assert.isFalse(isLinkPreviewDateValid(Date.now() + twoDays));
|
||||
});
|
||||
|
||||
it('returns true for timestamps before tomorrow', () => {
|
||||
assert.isTrue(isLinkPreviewDateValid(Date.now()));
|
||||
assert.isTrue(isLinkPreviewDateValid(Date.now() + 123));
|
||||
assert.isTrue(isLinkPreviewDateValid(Date.now() - 123));
|
||||
assert.isTrue(isLinkPreviewDateValid(new Date(1995, 3, 20).valueOf()));
|
||||
assert.isTrue(isLinkPreviewDateValid(new Date(1970, 3, 20).valueOf()));
|
||||
assert.isTrue(isLinkPreviewDateValid(new Date(1969, 3, 20).valueOf()));
|
||||
assert.isTrue(isLinkPreviewDateValid(1));
|
||||
});
|
||||
});
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,11 @@
|
||||
const ONE_DAY = 24 * 60 * 60 * 1000;
|
||||
|
||||
export function isLinkPreviewDateValid(value: unknown): value is number {
|
||||
const maximumLinkPreviewDate = Date.now() + ONE_DAY;
|
||||
return (
|
||||
typeof value === 'number' &&
|
||||
value !== 0 &&
|
||||
Number.isFinite(value) &&
|
||||
value < maximumLinkPreviewDate
|
||||
);
|
||||
}
|
@ -0,0 +1,569 @@
|
||||
import { RequestInit, Response } from 'node-fetch';
|
||||
import { AbortSignal } from 'abort-controller';
|
||||
|
||||
import {
|
||||
IMAGE_GIF,
|
||||
IMAGE_ICO,
|
||||
IMAGE_JPEG,
|
||||
IMAGE_PNG,
|
||||
IMAGE_WEBP,
|
||||
MIMEType,
|
||||
} from '../types/MIME';
|
||||
|
||||
const MAX_REQUEST_COUNT_WITH_REDIRECTS = 20;
|
||||
// tslint:disable: prefer-for-of
|
||||
|
||||
// Lifted from the `fetch` spec [here][0].
|
||||
// [0]: https://fetch.spec.whatwg.org/#redirect-status
|
||||
const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
|
||||
|
||||
const MAX_CONTENT_TYPE_LENGTH_TO_PARSE = 100;
|
||||
|
||||
// Though we'll accept HTML of any Content-Length (including no specified length), we
|
||||
// will only load some of the HTML. So we might start loading a 99 gigabyte HTML page
|
||||
// but only parse the first 500 kilobytes. However, if the Content-Length is less than
|
||||
// this, we won't waste space.
|
||||
const MAX_HTML_BYTES_TO_LOAD = 500 * 1024;
|
||||
|
||||
// `<title>x` is 8 bytes. Nothing else (meta tags, etc) will even fit, so we can ignore
|
||||
// it. This is mostly to protect us against empty response bodies.
|
||||
const MIN_HTML_CONTENT_LENGTH = 8;
|
||||
|
||||
// Similar to the above. We don't want to show tiny images (even though the more likely
|
||||
// case is that the Content-Length is 0).
|
||||
const MIN_IMAGE_CONTENT_LENGTH = 8;
|
||||
const MAX_IMAGE_CONTENT_LENGTH = 1024 * 1024;
|
||||
const VALID_IMAGE_MIME_TYPES: Set<MIMEType> = new Set([
|
||||
IMAGE_GIF,
|
||||
IMAGE_ICO,
|
||||
IMAGE_JPEG,
|
||||
IMAGE_PNG,
|
||||
IMAGE_WEBP,
|
||||
]);
|
||||
|
||||
// We want to discard unreasonable dates. Update this in ~950 years. (This may discard
|
||||
// some reasonable dates, which is okay because it is only for link previews.)
|
||||
const MIN_DATE = 0;
|
||||
const MAX_DATE = new Date(3000, 0, 1).valueOf();
|
||||
|
||||
const emptyContentType = { type: null, charset: null };
|
||||
|
||||
type FetchFn = (href: string, init: RequestInit) => Promise<Response>;
|
||||
|
||||
export interface LinkPreviewMetadata {
|
||||
title: string;
|
||||
description: null | string;
|
||||
date: null | number;
|
||||
imageHref: null | string;
|
||||
}
|
||||
|
||||
export interface LinkPreviewImage {
|
||||
data: ArrayBuffer;
|
||||
contentType: MIMEType;
|
||||
}
|
||||
|
||||
type ParsedContentType =
|
||||
| { type: null; charset: null }
|
||||
| { type: MIMEType; charset: null | string };
|
||||
|
||||
// This throws non-helpful errors because (1) it logs (2) it will be immediately caught.
|
||||
async function fetchWithRedirects(
|
||||
fetchFn: FetchFn,
|
||||
href: string,
|
||||
options: RequestInit
|
||||
): Promise<Response> {
|
||||
const urlsSeen = new Set<string>();
|
||||
|
||||
let nextHrefToLoad = href;
|
||||
for (let i = 0; i < MAX_REQUEST_COUNT_WITH_REDIRECTS; i += 1) {
|
||||
if (urlsSeen.has(nextHrefToLoad)) {
|
||||
window.log.warn('fetchWithRedirects: found a redirect loop');
|
||||
throw new Error('redirect loop');
|
||||
}
|
||||
urlsSeen.add(nextHrefToLoad);
|
||||
|
||||
// This `await` is deliberatly inside of a loop.
|
||||
// eslint-disable-next-line no-await-in-loop
|
||||
const response = await fetchFn(nextHrefToLoad, {
|
||||
...options,
|
||||
redirect: 'manual',
|
||||
});
|
||||
|
||||
if (!REDIRECT_STATUSES.has(response.status)) {
|
||||
return response;
|
||||
}
|
||||
|
||||
const location = response.headers.get('location');
|
||||
if (!location) {
|
||||
window.log.warn(
|
||||
'fetchWithRedirects: got a redirect status code but no Location header; bailing'
|
||||
);
|
||||
throw new Error('no location with redirect');
|
||||
}
|
||||
|
||||
const newUrl = maybeParseUrl(location, nextHrefToLoad);
|
||||
if (newUrl?.protocol !== 'https:') {
|
||||
window.log.warn(
|
||||
'fetchWithRedirects: got a redirect status code and an invalid Location header'
|
||||
);
|
||||
throw new Error('invalid location');
|
||||
}
|
||||
|
||||
nextHrefToLoad = newUrl.href;
|
||||
}
|
||||
|
||||
window.log.warn('fetchWithRedirects: too many redirects');
|
||||
throw new Error('too many redirects');
|
||||
}
|
||||
|
||||
function maybeParseUrl(href: string, base: string): null | URL {
|
||||
let result: URL;
|
||||
try {
|
||||
result = new URL(href, base);
|
||||
} catch (err) {
|
||||
return null;
|
||||
}
|
||||
// We never need the hash
|
||||
result.hash = '';
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a Content-Type header value. Refer to [RFC 2045][0] for details (though this is
|
||||
* a simplified version for link previews.
|
||||
* [0]: https://tools.ietf.org/html/rfc2045
|
||||
*/
|
||||
const parseContentType = (headerValue: string | null): ParsedContentType => {
|
||||
if (!headerValue || headerValue.length > MAX_CONTENT_TYPE_LENGTH_TO_PARSE) {
|
||||
return emptyContentType;
|
||||
}
|
||||
|
||||
const [rawType, ...rawParameters] = headerValue
|
||||
.toLowerCase()
|
||||
.split(/;/g)
|
||||
.map(part => part.trim())
|
||||
.filter(Boolean);
|
||||
if (!rawType) {
|
||||
return emptyContentType;
|
||||
}
|
||||
|
||||
let charset: null | string = null;
|
||||
for (let i = 0; i < rawParameters.length; i += 1) {
|
||||
const rawParameter = rawParameters[i];
|
||||
const parsed = new URLSearchParams(rawParameter);
|
||||
const parsedCharset = parsed.get('charset')?.trim();
|
||||
if (parsedCharset) {
|
||||
charset = parsedCharset;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
type: rawType,
|
||||
charset,
|
||||
};
|
||||
};
|
||||
|
||||
const isInlineContentDisposition = (headerValue: string | null): boolean =>
|
||||
!headerValue || headerValue.split(';', 1)[0] === 'inline';
|
||||
|
||||
const parseContentLength = (headerValue: string | null): number => {
|
||||
// No need to parse gigantic Content-Lengths; only parse the first 10 digits.
|
||||
if (typeof headerValue !== 'string' || !/^\d{1,10}$/g.test(headerValue)) {
|
||||
return Infinity;
|
||||
}
|
||||
const result = parseInt(headerValue, 10);
|
||||
return Number.isNaN(result) ? Infinity : result;
|
||||
};
|
||||
|
||||
const emptyHtmlDocument = (): HTMLDocument =>
|
||||
new DOMParser().parseFromString('', 'text/html');
|
||||
|
||||
// The charset behavior here follows the [W3 guidelines][0]. The priority is BOM, HTTP
|
||||
// header, `http-equiv` meta tag, `charset` meta tag, and finally a UTF-8 fallback.
|
||||
// (This fallback could, perhaps, be smarter based on user locale.)
|
||||
// [0]: https://www.w3.org/International/questions/qa-html-encoding-declarations.en
|
||||
const parseHtmlBytes = (
|
||||
bytes: Readonly<Uint8Array>,
|
||||
httpCharset: string | null
|
||||
): HTMLDocument => {
|
||||
const hasBom = bytes[0] === 0xef && bytes[1] === 0xbb && bytes[2] === 0xbf;
|
||||
|
||||
let isSureOfCharset: boolean;
|
||||
let decoder: TextDecoder;
|
||||
if (hasBom) {
|
||||
decoder = new TextDecoder();
|
||||
isSureOfCharset = true;
|
||||
} else if (httpCharset) {
|
||||
try {
|
||||
decoder = new TextDecoder(httpCharset);
|
||||
isSureOfCharset = true;
|
||||
} catch (err) {
|
||||
decoder = new TextDecoder();
|
||||
isSureOfCharset = false;
|
||||
}
|
||||
} else {
|
||||
decoder = new TextDecoder();
|
||||
isSureOfCharset = false;
|
||||
}
|
||||
|
||||
let decoded: string;
|
||||
try {
|
||||
decoded = decoder.decode(bytes);
|
||||
} catch (err) {
|
||||
decoded = '';
|
||||
}
|
||||
|
||||
let document: HTMLDocument;
|
||||
try {
|
||||
document = new DOMParser().parseFromString(decoded, 'text/html');
|
||||
} catch (err) {
|
||||
document = emptyHtmlDocument();
|
||||
}
|
||||
|
||||
if (!isSureOfCharset) {
|
||||
const httpEquiv = document
|
||||
.querySelector('meta[http-equiv="content-type"]')
|
||||
?.getAttribute('content');
|
||||
if (httpEquiv) {
|
||||
const httpEquivCharset = parseContentType(httpEquiv).charset;
|
||||
if (httpEquivCharset) {
|
||||
return parseHtmlBytes(bytes, httpEquivCharset);
|
||||
}
|
||||
}
|
||||
|
||||
const metaCharset = document
|
||||
.querySelector('meta[charset]')
|
||||
?.getAttribute('charset');
|
||||
if (metaCharset) {
|
||||
return parseHtmlBytes(bytes, metaCharset);
|
||||
}
|
||||
}
|
||||
|
||||
return document;
|
||||
};
|
||||
|
||||
const getHtmlDocument = async (
|
||||
body: AsyncIterable<string | Uint8Array>,
|
||||
contentLength: number,
|
||||
httpCharset: string | null,
|
||||
abortSignal: AbortSignal
|
||||
): Promise<HTMLDocument> => {
|
||||
let result: HTMLDocument = emptyHtmlDocument();
|
||||
|
||||
const maxHtmlBytesToLoad = Math.min(contentLength, MAX_HTML_BYTES_TO_LOAD);
|
||||
const buffer = new Uint8Array(new ArrayBuffer(maxHtmlBytesToLoad));
|
||||
let bytesLoadedSoFar = 0;
|
||||
|
||||
try {
|
||||
// `for ... of` is much cleaner here, so we allow it.
|
||||
/* eslint-disable no-restricted-syntax */
|
||||
for await (let chunk of body) {
|
||||
if (abortSignal.aborted) {
|
||||
break;
|
||||
}
|
||||
|
||||
// This check exists to satisfy TypeScript; chunk should always be a Buffer.
|
||||
if (typeof chunk === 'string') {
|
||||
if (
|
||||
httpCharset !== null &&
|
||||
httpCharset !== undefined &&
|
||||
Buffer.isEncoding(httpCharset)
|
||||
) {
|
||||
chunk = Buffer.from(chunk, httpCharset);
|
||||
} else {
|
||||
chunk = Buffer.from(chunk, 'utf8');
|
||||
}
|
||||
}
|
||||
|
||||
const truncatedChunk = chunk.slice(
|
||||
0,
|
||||
maxHtmlBytesToLoad - bytesLoadedSoFar
|
||||
);
|
||||
buffer.set(truncatedChunk, bytesLoadedSoFar);
|
||||
bytesLoadedSoFar += truncatedChunk.byteLength;
|
||||
|
||||
result = parseHtmlBytes(buffer.slice(0, bytesLoadedSoFar), httpCharset);
|
||||
|
||||
const hasLoadedMaxBytes = bytesLoadedSoFar >= maxHtmlBytesToLoad;
|
||||
if (hasLoadedMaxBytes) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* eslint-enable no-restricted-syntax */
|
||||
} catch (err) {
|
||||
window.log.warn(
|
||||
'getHtmlDocument: error when reading body; continuing with what we got'
|
||||
);
|
||||
}
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
const getOpenGraphContent = (
|
||||
document: HTMLDocument,
|
||||
properties: ReadonlyArray<string>
|
||||
): string | null => {
|
||||
for (let i = 0; i < properties.length; i += 1) {
|
||||
const property = properties[i];
|
||||
const content = document
|
||||
.querySelector(`meta[property="${property}"]`)
|
||||
?.getAttribute('content')
|
||||
?.trim();
|
||||
if (content) {
|
||||
return content;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
const getLinkHrefAttribute = (
|
||||
document: HTMLDocument,
|
||||
rels: ReadonlyArray<string>
|
||||
): string | null => {
|
||||
for (let i = 0; i < rels.length; i += 1) {
|
||||
const rel = rels[i];
|
||||
const href = document
|
||||
.querySelector(`link[rel="${rel}"]`)
|
||||
?.getAttribute('href')
|
||||
?.trim();
|
||||
if (href) {
|
||||
return href;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
const parseMetadata = (
|
||||
document: HTMLDocument,
|
||||
href: string
|
||||
): LinkPreviewMetadata | null => {
|
||||
const title =
|
||||
getOpenGraphContent(document, ['og:title']) || document.title.trim();
|
||||
if (!title) {
|
||||
window.log.warn(
|
||||
"parseMetadata: HTML document doesn't have a title; bailing"
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
const description =
|
||||
getOpenGraphContent(document, ['og:description']) ||
|
||||
document
|
||||
.querySelector('meta[name="description"]')
|
||||
?.getAttribute('content')
|
||||
?.trim() ||
|
||||
null;
|
||||
|
||||
const rawImageHref =
|
||||
getOpenGraphContent(document, ['og:image', 'og:image:url']) ||
|
||||
getLinkHrefAttribute(document, [
|
||||
'shortcut icon',
|
||||
'icon',
|
||||
'apple-touch-icon',
|
||||
]);
|
||||
const imageUrl = rawImageHref ? maybeParseUrl(rawImageHref, href) : null;
|
||||
const imageHref = imageUrl ? imageUrl.href : null;
|
||||
|
||||
let date: number | null = null;
|
||||
const rawDate = getOpenGraphContent(document, [
|
||||
'og:published_time',
|
||||
'article:published_time',
|
||||
'og:modified_time',
|
||||
'article:modified_time',
|
||||
]);
|
||||
if (rawDate) {
|
||||
const parsed = Date.parse(rawDate);
|
||||
if (parsed > MIN_DATE && parsed < MAX_DATE) {
|
||||
date = parsed;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
title,
|
||||
description,
|
||||
imageHref,
|
||||
date,
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* This attempts to fetch link preview metadata, returning `null` if it cannot be found
|
||||
* for any reason.
|
||||
*
|
||||
* NOTE: This does NOT validate the incoming URL for safety. For example, it may fetch an
|
||||
* insecure HTTP href. It also does not offer a timeout; that is up to the caller.
|
||||
*
|
||||
* At a high level, it:
|
||||
*
|
||||
* 1. Makes a GET request, following up to 20 redirects (`fetch`'s default).
|
||||
* 2. Checks the response status code and headers to make sure it's a normal HTML
|
||||
* response.
|
||||
* 3. Streams up to `MAX_HTML_BYTES_TO_LOAD`, stopping when (1) it has loaded all of the
|
||||
* HTML (2) loaded the maximum number of bytes (3) finished loading the `<head>`.
|
||||
* 4. Parses the resulting HTML with `DOMParser`.
|
||||
* 5. Grabs the title, description, image URL, and date.
|
||||
*/
|
||||
export async function fetchLinkPreviewMetadata(
|
||||
fetchFn: FetchFn,
|
||||
href: string,
|
||||
abortSignal: AbortSignal
|
||||
): Promise<null | LinkPreviewMetadata> {
|
||||
let response: Response;
|
||||
try {
|
||||
response = await fetchWithRedirects(fetchFn, href, {
|
||||
headers: {
|
||||
Accept: 'text/html,application/xhtml+xml',
|
||||
'User-Agent': 'WhatsApp',
|
||||
},
|
||||
signal: abortSignal,
|
||||
});
|
||||
} catch (err) {
|
||||
window.log.warn(
|
||||
'fetchLinkPreviewMetadata: failed to fetch link preview HTML; bailing'
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
window.log.warn(
|
||||
`fetchLinkPreviewMetadata: got a ${response.status} status code; bailing`
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!response.body) {
|
||||
window.log.warn('fetchLinkPreviewMetadata: no response body; bailing');
|
||||
return null;
|
||||
}
|
||||
|
||||
if (
|
||||
!isInlineContentDisposition(response.headers.get('Content-Disposition'))
|
||||
) {
|
||||
window.log.warn(
|
||||
'fetchLinkPreviewMetadata: Content-Disposition header is not inline; bailing'
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (abortSignal.aborted) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const contentLength = parseContentLength(
|
||||
response.headers.get('Content-Length')
|
||||
);
|
||||
if (contentLength < MIN_HTML_CONTENT_LENGTH) {
|
||||
window.log.warn(
|
||||
'fetchLinkPreviewMetadata: Content-Length is too short; bailing'
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
const contentType = parseContentType(response.headers.get('Content-Type'));
|
||||
if (contentType.type !== 'text/html') {
|
||||
window.log.warn(
|
||||
'fetchLinkPreviewMetadata: Content-Type is not HTML; bailing'
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
const document = await getHtmlDocument(
|
||||
response.body,
|
||||
contentLength,
|
||||
contentType.charset,
|
||||
abortSignal
|
||||
);
|
||||
|
||||
// [The Node docs about `ReadableStream.prototype[Symbol.asyncIterator]`][0] say that
|
||||
// the stream will be destroyed if you `break` out of the loop, but I could not
|
||||
// reproduce this. Also [`destroy` is a documented method][1] but it is not in the
|
||||
// Node types, which is why we do this cast to `any`.
|
||||
// [0]: https://nodejs.org/docs/latest-v12.x/api/stream.html#stream_readable_symbol_asynciterator
|
||||
// [1]: https://nodejs.org/docs/latest-v12.x/api/stream.html#stream_readable_destroy_error
|
||||
try {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
(response.body as any).destroy();
|
||||
} catch (err) {
|
||||
// Ignored.
|
||||
}
|
||||
|
||||
if (abortSignal.aborted) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return parseMetadata(document, response.url);
|
||||
}
|
||||
|
||||
/**
|
||||
* This attempts to fetch an image, returning `null` if it fails for any reason.
|
||||
*
|
||||
* NOTE: This does NOT validate the incoming URL for safety. For example, it may fetch an
|
||||
* insecure HTTP href. It also does not offer a timeout; that is up to the caller.
|
||||
*/
|
||||
export async function fetchLinkPreviewImage(
|
||||
fetchFn: FetchFn,
|
||||
href: string,
|
||||
abortSignal: AbortSignal
|
||||
): Promise<null | LinkPreviewImage> {
|
||||
let response: Response;
|
||||
try {
|
||||
response = await fetchWithRedirects(fetchFn, href, {
|
||||
headers: {
|
||||
'User-Agent': 'WhatsApp',
|
||||
},
|
||||
size: MAX_IMAGE_CONTENT_LENGTH,
|
||||
signal: abortSignal,
|
||||
});
|
||||
} catch (err) {
|
||||
window.log.warn('fetchLinkPreviewImage: failed to fetch image; bailing');
|
||||
return null;
|
||||
}
|
||||
|
||||
if (abortSignal.aborted) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
window.log.warn(
|
||||
`fetchLinkPreviewImage: got a ${response.status} status code; bailing`
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
const contentLength = parseContentLength(
|
||||
response.headers.get('Content-Length')
|
||||
);
|
||||
if (contentLength < MIN_IMAGE_CONTENT_LENGTH) {
|
||||
window.log.warn(
|
||||
'fetchLinkPreviewImage: Content-Length is too short; bailing'
|
||||
);
|
||||
return null;
|
||||
}
|
||||
if (contentLength > MAX_IMAGE_CONTENT_LENGTH) {
|
||||
window.log.warn(
|
||||
'fetchLinkPreviewImage: Content-Length is too large or is unset; bailing'
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
const { type: contentType } = parseContentType(
|
||||
response.headers.get('Content-Type')
|
||||
);
|
||||
if (!contentType || !VALID_IMAGE_MIME_TYPES.has(contentType)) {
|
||||
window.log.warn(
|
||||
'fetchLinkPreviewImage: Content-Type is not an image; bailing'
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
let data: ArrayBuffer;
|
||||
try {
|
||||
data = await response.arrayBuffer();
|
||||
} catch (err) {
|
||||
window.log.warn('fetchLinkPreviewImage: failed to read body; bailing');
|
||||
return null;
|
||||
}
|
||||
|
||||
return { data, contentType };
|
||||
}
|
Loading…
Reference in New Issue