add staged link previews for everything from signal
parent
f5818817e9
commit
13e02b5bf1
@ -1 +1,3 @@
|
|||||||
|
export function isLinkSafeToPreview(link: string): boolean;
|
||||||
|
|
||||||
export function isLinkSneaky(link: string): boolean;
|
export function isLinkSneaky(link: string): boolean;
|
||||||
|
@ -0,0 +1,174 @@
|
|||||||
|
import React, { useEffect, useState } from 'react';
|
||||||
|
import { arrayBufferFromFile, AttachmentType } from '../../../types/Attachment';
|
||||||
|
import { AttachmentUtil, LinkPreviewUtil } from '../../../util';
|
||||||
|
import { StagedLinkPreview } from '../../conversation/StagedLinkPreview';
|
||||||
|
import fetch from 'node-fetch';
|
||||||
|
import { fetchLinkPreviewImage } from '../../../util/linkPreviewFetch';
|
||||||
|
import { AbortController, AbortSignal } from 'abort-controller';
|
||||||
|
|
||||||
|
type Props = {
|
||||||
|
url: string;
|
||||||
|
onClose: () => void;
|
||||||
|
};
|
||||||
|
const LINK_PREVIEW_TIMEOUT = 60 * 1000;
|
||||||
|
|
||||||
|
export interface GetLinkPreviewResultImage {
|
||||||
|
data: ArrayBuffer;
|
||||||
|
size: number;
|
||||||
|
contentType: string;
|
||||||
|
width: number;
|
||||||
|
height: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface GetLinkPreviewResult {
|
||||||
|
title: string;
|
||||||
|
url: string;
|
||||||
|
image?: GetLinkPreviewResultImage;
|
||||||
|
description: string | null;
|
||||||
|
date: number | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const getPreview = async (
|
||||||
|
url: string,
|
||||||
|
abortSignal: AbortSignal
|
||||||
|
): Promise<null | GetLinkPreviewResult> => {
|
||||||
|
// This is already checked elsewhere, but we want to be extra-careful.
|
||||||
|
if (!window.Signal.LinkPreviews.isLinkSafeToPreview(url)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const linkPreviewMetadata = await LinkPreviewUtil.fetchLinkPreviewMetadata(
|
||||||
|
fetch,
|
||||||
|
url,
|
||||||
|
abortSignal
|
||||||
|
);
|
||||||
|
if (!linkPreviewMetadata) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
const { title, imageHref, description, date } = linkPreviewMetadata;
|
||||||
|
|
||||||
|
let image;
|
||||||
|
if (imageHref && window.Signal.LinkPreviews.isLinkSafeToPreview(imageHref)) {
|
||||||
|
let objectUrl: void | string;
|
||||||
|
try {
|
||||||
|
const fullSizeImage = await fetchLinkPreviewImage(
|
||||||
|
fetch,
|
||||||
|
imageHref,
|
||||||
|
abortSignal
|
||||||
|
);
|
||||||
|
if (!fullSizeImage) {
|
||||||
|
throw new Error('Failed to fetch link preview image');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure that this file is either small enough or is resized to meet our
|
||||||
|
// requirements for attachments
|
||||||
|
const withBlob = await AttachmentUtil.autoScale({
|
||||||
|
contentType: fullSizeImage.contentType,
|
||||||
|
file: new Blob([fullSizeImage.data], {
|
||||||
|
type: fullSizeImage.contentType,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
const data = await arrayBufferFromFile(withBlob.file);
|
||||||
|
objectUrl = URL.createObjectURL(withBlob.file);
|
||||||
|
|
||||||
|
const dimensions = await window.Signal.Types.VisualAttachment.getImageDimensions(
|
||||||
|
{
|
||||||
|
objectUrl,
|
||||||
|
logger: window.log,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
image = {
|
||||||
|
data,
|
||||||
|
size: data.byteLength,
|
||||||
|
...dimensions,
|
||||||
|
contentType: withBlob.file.type,
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
// We still want to show the preview if we failed to get an image
|
||||||
|
window.log.error(
|
||||||
|
'getPreview failed to get image for link preview:',
|
||||||
|
error.message
|
||||||
|
);
|
||||||
|
} finally {
|
||||||
|
if (objectUrl) {
|
||||||
|
URL.revokeObjectURL(objectUrl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
title,
|
||||||
|
url,
|
||||||
|
image,
|
||||||
|
description,
|
||||||
|
date,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
export const SessionStagedLinkPreview = (props: Props) => {
|
||||||
|
const [isLoaded, setIsLoaded] = useState(false);
|
||||||
|
const [title, setTitle] = useState<string | null>(null);
|
||||||
|
const [domain, setDomain] = useState<string | null>(null);
|
||||||
|
const [description, setDescription] = useState<string | null>(null);
|
||||||
|
const [image, setImage] = useState<AttachmentType | undefined>(undefined);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
// Use this abortcontroller to stop current fetch requests when url changed
|
||||||
|
const abortController = new AbortController();
|
||||||
|
setTimeout(() => {
|
||||||
|
abortController.abort();
|
||||||
|
}, LINK_PREVIEW_TIMEOUT);
|
||||||
|
|
||||||
|
setIsLoaded(false);
|
||||||
|
setTitle(null);
|
||||||
|
setDomain(null);
|
||||||
|
setDescription(null);
|
||||||
|
setImage(undefined);
|
||||||
|
|
||||||
|
getPreview(props.url, abortController.signal)
|
||||||
|
.then(ret => {
|
||||||
|
setIsLoaded(true);
|
||||||
|
if (ret) {
|
||||||
|
setTitle(ret.title);
|
||||||
|
if (ret.image?.width) {
|
||||||
|
if (ret.image) {
|
||||||
|
const blob = new Blob([ret.image.data], {
|
||||||
|
type: ret.image.contentType,
|
||||||
|
});
|
||||||
|
const imageAttachment = {
|
||||||
|
...ret.image,
|
||||||
|
url: URL.createObjectURL(blob),
|
||||||
|
fileName: 'preview',
|
||||||
|
};
|
||||||
|
setImage(imageAttachment);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
setDomain(window.Signal.LinkPreviews.getDomain(ret.url));
|
||||||
|
if (ret.description) {
|
||||||
|
setDescription(ret.description);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.catch(err => {
|
||||||
|
abortController.abort();
|
||||||
|
setIsLoaded(true);
|
||||||
|
});
|
||||||
|
return () => {
|
||||||
|
// Cancel other in-flight link preview requests.
|
||||||
|
abortController.abort();
|
||||||
|
};
|
||||||
|
}, [props.url]);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<StagedLinkPreview
|
||||||
|
onClose={props.onClose}
|
||||||
|
isLoaded={isLoaded}
|
||||||
|
title={title}
|
||||||
|
domain={domain}
|
||||||
|
image={image as any}
|
||||||
|
description={description}
|
||||||
|
/>
|
||||||
|
);
|
||||||
|
};
|
@ -0,0 +1,41 @@
|
|||||||
|
import { assert } from 'chai';
|
||||||
|
|
||||||
|
import { isLinkPreviewDateValid } from '../../util/isLinkPreviewDateValid';
|
||||||
|
|
||||||
|
describe('isLinkPreviewDateValid', () => {
|
||||||
|
it('returns false for non-numbers', () => {
|
||||||
|
assert.isFalse(isLinkPreviewDateValid(null));
|
||||||
|
assert.isFalse(isLinkPreviewDateValid(undefined));
|
||||||
|
assert.isFalse(isLinkPreviewDateValid(Date.now().toString()));
|
||||||
|
assert.isFalse(isLinkPreviewDateValid(new Date()));
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns false for zero', () => {
|
||||||
|
assert.isFalse(isLinkPreviewDateValid(0));
|
||||||
|
assert.isFalse(isLinkPreviewDateValid(-0));
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns false for NaN', () => {
|
||||||
|
assert.isFalse(isLinkPreviewDateValid(0 / 0));
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns false for any infinite value', () => {
|
||||||
|
assert.isFalse(isLinkPreviewDateValid(Infinity));
|
||||||
|
assert.isFalse(isLinkPreviewDateValid(-Infinity));
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns false for timestamps more than a day from now', () => {
|
||||||
|
const twoDays = 2 * 24 * 60 * 60 * 1000;
|
||||||
|
assert.isFalse(isLinkPreviewDateValid(Date.now() + twoDays));
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns true for timestamps before tomorrow', () => {
|
||||||
|
assert.isTrue(isLinkPreviewDateValid(Date.now()));
|
||||||
|
assert.isTrue(isLinkPreviewDateValid(Date.now() + 123));
|
||||||
|
assert.isTrue(isLinkPreviewDateValid(Date.now() - 123));
|
||||||
|
assert.isTrue(isLinkPreviewDateValid(new Date(1995, 3, 20).valueOf()));
|
||||||
|
assert.isTrue(isLinkPreviewDateValid(new Date(1970, 3, 20).valueOf()));
|
||||||
|
assert.isTrue(isLinkPreviewDateValid(new Date(1969, 3, 20).valueOf()));
|
||||||
|
assert.isTrue(isLinkPreviewDateValid(1));
|
||||||
|
});
|
||||||
|
});
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,11 @@
|
|||||||
|
const ONE_DAY = 24 * 60 * 60 * 1000;
|
||||||
|
|
||||||
|
export function isLinkPreviewDateValid(value: unknown): value is number {
|
||||||
|
const maximumLinkPreviewDate = Date.now() + ONE_DAY;
|
||||||
|
return (
|
||||||
|
typeof value === 'number' &&
|
||||||
|
value !== 0 &&
|
||||||
|
Number.isFinite(value) &&
|
||||||
|
value < maximumLinkPreviewDate
|
||||||
|
);
|
||||||
|
}
|
@ -0,0 +1,569 @@
|
|||||||
|
import { RequestInit, Response } from 'node-fetch';
|
||||||
|
import { AbortSignal } from 'abort-controller';
|
||||||
|
|
||||||
|
import {
|
||||||
|
IMAGE_GIF,
|
||||||
|
IMAGE_ICO,
|
||||||
|
IMAGE_JPEG,
|
||||||
|
IMAGE_PNG,
|
||||||
|
IMAGE_WEBP,
|
||||||
|
MIMEType,
|
||||||
|
} from '../types/MIME';
|
||||||
|
|
||||||
|
const MAX_REQUEST_COUNT_WITH_REDIRECTS = 20;
|
||||||
|
// tslint:disable: prefer-for-of
|
||||||
|
|
||||||
|
// Lifted from the `fetch` spec [here][0].
|
||||||
|
// [0]: https://fetch.spec.whatwg.org/#redirect-status
|
||||||
|
const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
|
||||||
|
|
||||||
|
const MAX_CONTENT_TYPE_LENGTH_TO_PARSE = 100;
|
||||||
|
|
||||||
|
// Though we'll accept HTML of any Content-Length (including no specified length), we
|
||||||
|
// will only load some of the HTML. So we might start loading a 99 gigabyte HTML page
|
||||||
|
// but only parse the first 500 kilobytes. However, if the Content-Length is less than
|
||||||
|
// this, we won't waste space.
|
||||||
|
const MAX_HTML_BYTES_TO_LOAD = 500 * 1024;
|
||||||
|
|
||||||
|
// `<title>x` is 8 bytes. Nothing else (meta tags, etc) will even fit, so we can ignore
|
||||||
|
// it. This is mostly to protect us against empty response bodies.
|
||||||
|
const MIN_HTML_CONTENT_LENGTH = 8;
|
||||||
|
|
||||||
|
// Similar to the above. We don't want to show tiny images (even though the more likely
|
||||||
|
// case is that the Content-Length is 0).
|
||||||
|
const MIN_IMAGE_CONTENT_LENGTH = 8;
|
||||||
|
const MAX_IMAGE_CONTENT_LENGTH = 1024 * 1024;
|
||||||
|
const VALID_IMAGE_MIME_TYPES: Set<MIMEType> = new Set([
|
||||||
|
IMAGE_GIF,
|
||||||
|
IMAGE_ICO,
|
||||||
|
IMAGE_JPEG,
|
||||||
|
IMAGE_PNG,
|
||||||
|
IMAGE_WEBP,
|
||||||
|
]);
|
||||||
|
|
||||||
|
// We want to discard unreasonable dates. Update this in ~950 years. (This may discard
|
||||||
|
// some reasonable dates, which is okay because it is only for link previews.)
|
||||||
|
const MIN_DATE = 0;
|
||||||
|
const MAX_DATE = new Date(3000, 0, 1).valueOf();
|
||||||
|
|
||||||
|
const emptyContentType = { type: null, charset: null };
|
||||||
|
|
||||||
|
type FetchFn = (href: string, init: RequestInit) => Promise<Response>;
|
||||||
|
|
||||||
|
export interface LinkPreviewMetadata {
|
||||||
|
title: string;
|
||||||
|
description: null | string;
|
||||||
|
date: null | number;
|
||||||
|
imageHref: null | string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface LinkPreviewImage {
|
||||||
|
data: ArrayBuffer;
|
||||||
|
contentType: MIMEType;
|
||||||
|
}
|
||||||
|
|
||||||
|
type ParsedContentType =
|
||||||
|
| { type: null; charset: null }
|
||||||
|
| { type: MIMEType; charset: null | string };
|
||||||
|
|
||||||
|
// This throws non-helpful errors because (1) it logs (2) it will be immediately caught.
|
||||||
|
async function fetchWithRedirects(
|
||||||
|
fetchFn: FetchFn,
|
||||||
|
href: string,
|
||||||
|
options: RequestInit
|
||||||
|
): Promise<Response> {
|
||||||
|
const urlsSeen = new Set<string>();
|
||||||
|
|
||||||
|
let nextHrefToLoad = href;
|
||||||
|
for (let i = 0; i < MAX_REQUEST_COUNT_WITH_REDIRECTS; i += 1) {
|
||||||
|
if (urlsSeen.has(nextHrefToLoad)) {
|
||||||
|
window.log.warn('fetchWithRedirects: found a redirect loop');
|
||||||
|
throw new Error('redirect loop');
|
||||||
|
}
|
||||||
|
urlsSeen.add(nextHrefToLoad);
|
||||||
|
|
||||||
|
// This `await` is deliberatly inside of a loop.
|
||||||
|
// eslint-disable-next-line no-await-in-loop
|
||||||
|
const response = await fetchFn(nextHrefToLoad, {
|
||||||
|
...options,
|
||||||
|
redirect: 'manual',
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!REDIRECT_STATUSES.has(response.status)) {
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
|
||||||
|
const location = response.headers.get('location');
|
||||||
|
if (!location) {
|
||||||
|
window.log.warn(
|
||||||
|
'fetchWithRedirects: got a redirect status code but no Location header; bailing'
|
||||||
|
);
|
||||||
|
throw new Error('no location with redirect');
|
||||||
|
}
|
||||||
|
|
||||||
|
const newUrl = maybeParseUrl(location, nextHrefToLoad);
|
||||||
|
if (newUrl?.protocol !== 'https:') {
|
||||||
|
window.log.warn(
|
||||||
|
'fetchWithRedirects: got a redirect status code and an invalid Location header'
|
||||||
|
);
|
||||||
|
throw new Error('invalid location');
|
||||||
|
}
|
||||||
|
|
||||||
|
nextHrefToLoad = newUrl.href;
|
||||||
|
}
|
||||||
|
|
||||||
|
window.log.warn('fetchWithRedirects: too many redirects');
|
||||||
|
throw new Error('too many redirects');
|
||||||
|
}
|
||||||
|
|
||||||
|
function maybeParseUrl(href: string, base: string): null | URL {
|
||||||
|
let result: URL;
|
||||||
|
try {
|
||||||
|
result = new URL(href, base);
|
||||||
|
} catch (err) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
// We never need the hash
|
||||||
|
result.hash = '';
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parses a Content-Type header value. Refer to [RFC 2045][0] for details (though this is
|
||||||
|
* a simplified version for link previews.
|
||||||
|
* [0]: https://tools.ietf.org/html/rfc2045
|
||||||
|
*/
|
||||||
|
const parseContentType = (headerValue: string | null): ParsedContentType => {
|
||||||
|
if (!headerValue || headerValue.length > MAX_CONTENT_TYPE_LENGTH_TO_PARSE) {
|
||||||
|
return emptyContentType;
|
||||||
|
}
|
||||||
|
|
||||||
|
const [rawType, ...rawParameters] = headerValue
|
||||||
|
.toLowerCase()
|
||||||
|
.split(/;/g)
|
||||||
|
.map(part => part.trim())
|
||||||
|
.filter(Boolean);
|
||||||
|
if (!rawType) {
|
||||||
|
return emptyContentType;
|
||||||
|
}
|
||||||
|
|
||||||
|
let charset: null | string = null;
|
||||||
|
for (let i = 0; i < rawParameters.length; i += 1) {
|
||||||
|
const rawParameter = rawParameters[i];
|
||||||
|
const parsed = new URLSearchParams(rawParameter);
|
||||||
|
const parsedCharset = parsed.get('charset')?.trim();
|
||||||
|
if (parsedCharset) {
|
||||||
|
charset = parsedCharset;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
type: rawType,
|
||||||
|
charset,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
const isInlineContentDisposition = (headerValue: string | null): boolean =>
|
||||||
|
!headerValue || headerValue.split(';', 1)[0] === 'inline';
|
||||||
|
|
||||||
|
const parseContentLength = (headerValue: string | null): number => {
|
||||||
|
// No need to parse gigantic Content-Lengths; only parse the first 10 digits.
|
||||||
|
if (typeof headerValue !== 'string' || !/^\d{1,10}$/g.test(headerValue)) {
|
||||||
|
return Infinity;
|
||||||
|
}
|
||||||
|
const result = parseInt(headerValue, 10);
|
||||||
|
return Number.isNaN(result) ? Infinity : result;
|
||||||
|
};
|
||||||
|
|
||||||
|
const emptyHtmlDocument = (): HTMLDocument =>
|
||||||
|
new DOMParser().parseFromString('', 'text/html');
|
||||||
|
|
||||||
|
// The charset behavior here follows the [W3 guidelines][0]. The priority is BOM, HTTP
|
||||||
|
// header, `http-equiv` meta tag, `charset` meta tag, and finally a UTF-8 fallback.
|
||||||
|
// (This fallback could, perhaps, be smarter based on user locale.)
|
||||||
|
// [0]: https://www.w3.org/International/questions/qa-html-encoding-declarations.en
|
||||||
|
const parseHtmlBytes = (
|
||||||
|
bytes: Readonly<Uint8Array>,
|
||||||
|
httpCharset: string | null
|
||||||
|
): HTMLDocument => {
|
||||||
|
const hasBom = bytes[0] === 0xef && bytes[1] === 0xbb && bytes[2] === 0xbf;
|
||||||
|
|
||||||
|
let isSureOfCharset: boolean;
|
||||||
|
let decoder: TextDecoder;
|
||||||
|
if (hasBom) {
|
||||||
|
decoder = new TextDecoder();
|
||||||
|
isSureOfCharset = true;
|
||||||
|
} else if (httpCharset) {
|
||||||
|
try {
|
||||||
|
decoder = new TextDecoder(httpCharset);
|
||||||
|
isSureOfCharset = true;
|
||||||
|
} catch (err) {
|
||||||
|
decoder = new TextDecoder();
|
||||||
|
isSureOfCharset = false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
decoder = new TextDecoder();
|
||||||
|
isSureOfCharset = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
let decoded: string;
|
||||||
|
try {
|
||||||
|
decoded = decoder.decode(bytes);
|
||||||
|
} catch (err) {
|
||||||
|
decoded = '';
|
||||||
|
}
|
||||||
|
|
||||||
|
let document: HTMLDocument;
|
||||||
|
try {
|
||||||
|
document = new DOMParser().parseFromString(decoded, 'text/html');
|
||||||
|
} catch (err) {
|
||||||
|
document = emptyHtmlDocument();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isSureOfCharset) {
|
||||||
|
const httpEquiv = document
|
||||||
|
.querySelector('meta[http-equiv="content-type"]')
|
||||||
|
?.getAttribute('content');
|
||||||
|
if (httpEquiv) {
|
||||||
|
const httpEquivCharset = parseContentType(httpEquiv).charset;
|
||||||
|
if (httpEquivCharset) {
|
||||||
|
return parseHtmlBytes(bytes, httpEquivCharset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const metaCharset = document
|
||||||
|
.querySelector('meta[charset]')
|
||||||
|
?.getAttribute('charset');
|
||||||
|
if (metaCharset) {
|
||||||
|
return parseHtmlBytes(bytes, metaCharset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return document;
|
||||||
|
};
|
||||||
|
|
||||||
|
const getHtmlDocument = async (
|
||||||
|
body: AsyncIterable<string | Uint8Array>,
|
||||||
|
contentLength: number,
|
||||||
|
httpCharset: string | null,
|
||||||
|
abortSignal: AbortSignal
|
||||||
|
): Promise<HTMLDocument> => {
|
||||||
|
let result: HTMLDocument = emptyHtmlDocument();
|
||||||
|
|
||||||
|
const maxHtmlBytesToLoad = Math.min(contentLength, MAX_HTML_BYTES_TO_LOAD);
|
||||||
|
const buffer = new Uint8Array(new ArrayBuffer(maxHtmlBytesToLoad));
|
||||||
|
let bytesLoadedSoFar = 0;
|
||||||
|
|
||||||
|
try {
|
||||||
|
// `for ... of` is much cleaner here, so we allow it.
|
||||||
|
/* eslint-disable no-restricted-syntax */
|
||||||
|
for await (let chunk of body) {
|
||||||
|
if (abortSignal.aborted) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This check exists to satisfy TypeScript; chunk should always be a Buffer.
|
||||||
|
if (typeof chunk === 'string') {
|
||||||
|
if (
|
||||||
|
httpCharset !== null &&
|
||||||
|
httpCharset !== undefined &&
|
||||||
|
Buffer.isEncoding(httpCharset)
|
||||||
|
) {
|
||||||
|
chunk = Buffer.from(chunk, httpCharset);
|
||||||
|
} else {
|
||||||
|
chunk = Buffer.from(chunk, 'utf8');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const truncatedChunk = chunk.slice(
|
||||||
|
0,
|
||||||
|
maxHtmlBytesToLoad - bytesLoadedSoFar
|
||||||
|
);
|
||||||
|
buffer.set(truncatedChunk, bytesLoadedSoFar);
|
||||||
|
bytesLoadedSoFar += truncatedChunk.byteLength;
|
||||||
|
|
||||||
|
result = parseHtmlBytes(buffer.slice(0, bytesLoadedSoFar), httpCharset);
|
||||||
|
|
||||||
|
const hasLoadedMaxBytes = bytesLoadedSoFar >= maxHtmlBytesToLoad;
|
||||||
|
if (hasLoadedMaxBytes) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* eslint-enable no-restricted-syntax */
|
||||||
|
} catch (err) {
|
||||||
|
window.log.warn(
|
||||||
|
'getHtmlDocument: error when reading body; continuing with what we got'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
};
|
||||||
|
|
||||||
|
const getOpenGraphContent = (
|
||||||
|
document: HTMLDocument,
|
||||||
|
properties: ReadonlyArray<string>
|
||||||
|
): string | null => {
|
||||||
|
for (let i = 0; i < properties.length; i += 1) {
|
||||||
|
const property = properties[i];
|
||||||
|
const content = document
|
||||||
|
.querySelector(`meta[property="${property}"]`)
|
||||||
|
?.getAttribute('content')
|
||||||
|
?.trim();
|
||||||
|
if (content) {
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
};
|
||||||
|
|
||||||
|
const getLinkHrefAttribute = (
|
||||||
|
document: HTMLDocument,
|
||||||
|
rels: ReadonlyArray<string>
|
||||||
|
): string | null => {
|
||||||
|
for (let i = 0; i < rels.length; i += 1) {
|
||||||
|
const rel = rels[i];
|
||||||
|
const href = document
|
||||||
|
.querySelector(`link[rel="${rel}"]`)
|
||||||
|
?.getAttribute('href')
|
||||||
|
?.trim();
|
||||||
|
if (href) {
|
||||||
|
return href;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
};
|
||||||
|
|
||||||
|
const parseMetadata = (
|
||||||
|
document: HTMLDocument,
|
||||||
|
href: string
|
||||||
|
): LinkPreviewMetadata | null => {
|
||||||
|
const title =
|
||||||
|
getOpenGraphContent(document, ['og:title']) || document.title.trim();
|
||||||
|
if (!title) {
|
||||||
|
window.log.warn(
|
||||||
|
"parseMetadata: HTML document doesn't have a title; bailing"
|
||||||
|
);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const description =
|
||||||
|
getOpenGraphContent(document, ['og:description']) ||
|
||||||
|
document
|
||||||
|
.querySelector('meta[name="description"]')
|
||||||
|
?.getAttribute('content')
|
||||||
|
?.trim() ||
|
||||||
|
null;
|
||||||
|
|
||||||
|
const rawImageHref =
|
||||||
|
getOpenGraphContent(document, ['og:image', 'og:image:url']) ||
|
||||||
|
getLinkHrefAttribute(document, [
|
||||||
|
'shortcut icon',
|
||||||
|
'icon',
|
||||||
|
'apple-touch-icon',
|
||||||
|
]);
|
||||||
|
const imageUrl = rawImageHref ? maybeParseUrl(rawImageHref, href) : null;
|
||||||
|
const imageHref = imageUrl ? imageUrl.href : null;
|
||||||
|
|
||||||
|
let date: number | null = null;
|
||||||
|
const rawDate = getOpenGraphContent(document, [
|
||||||
|
'og:published_time',
|
||||||
|
'article:published_time',
|
||||||
|
'og:modified_time',
|
||||||
|
'article:modified_time',
|
||||||
|
]);
|
||||||
|
if (rawDate) {
|
||||||
|
const parsed = Date.parse(rawDate);
|
||||||
|
if (parsed > MIN_DATE && parsed < MAX_DATE) {
|
||||||
|
date = parsed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
title,
|
||||||
|
description,
|
||||||
|
imageHref,
|
||||||
|
date,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This attempts to fetch link preview metadata, returning `null` if it cannot be found
|
||||||
|
* for any reason.
|
||||||
|
*
|
||||||
|
* NOTE: This does NOT validate the incoming URL for safety. For example, it may fetch an
|
||||||
|
* insecure HTTP href. It also does not offer a timeout; that is up to the caller.
|
||||||
|
*
|
||||||
|
* At a high level, it:
|
||||||
|
*
|
||||||
|
* 1. Makes a GET request, following up to 20 redirects (`fetch`'s default).
|
||||||
|
* 2. Checks the response status code and headers to make sure it's a normal HTML
|
||||||
|
* response.
|
||||||
|
* 3. Streams up to `MAX_HTML_BYTES_TO_LOAD`, stopping when (1) it has loaded all of the
|
||||||
|
* HTML (2) loaded the maximum number of bytes (3) finished loading the `<head>`.
|
||||||
|
* 4. Parses the resulting HTML with `DOMParser`.
|
||||||
|
* 5. Grabs the title, description, image URL, and date.
|
||||||
|
*/
|
||||||
|
export async function fetchLinkPreviewMetadata(
|
||||||
|
fetchFn: FetchFn,
|
||||||
|
href: string,
|
||||||
|
abortSignal: AbortSignal
|
||||||
|
): Promise<null | LinkPreviewMetadata> {
|
||||||
|
let response: Response;
|
||||||
|
try {
|
||||||
|
response = await fetchWithRedirects(fetchFn, href, {
|
||||||
|
headers: {
|
||||||
|
Accept: 'text/html,application/xhtml+xml',
|
||||||
|
'User-Agent': 'WhatsApp',
|
||||||
|
},
|
||||||
|
signal: abortSignal,
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
window.log.warn(
|
||||||
|
'fetchLinkPreviewMetadata: failed to fetch link preview HTML; bailing'
|
||||||
|
);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
window.log.warn(
|
||||||
|
`fetchLinkPreviewMetadata: got a ${response.status} status code; bailing`
|
||||||
|
);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!response.body) {
|
||||||
|
window.log.warn('fetchLinkPreviewMetadata: no response body; bailing');
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (
|
||||||
|
!isInlineContentDisposition(response.headers.get('Content-Disposition'))
|
||||||
|
) {
|
||||||
|
window.log.warn(
|
||||||
|
'fetchLinkPreviewMetadata: Content-Disposition header is not inline; bailing'
|
||||||
|
);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (abortSignal.aborted) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const contentLength = parseContentLength(
|
||||||
|
response.headers.get('Content-Length')
|
||||||
|
);
|
||||||
|
if (contentLength < MIN_HTML_CONTENT_LENGTH) {
|
||||||
|
window.log.warn(
|
||||||
|
'fetchLinkPreviewMetadata: Content-Length is too short; bailing'
|
||||||
|
);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const contentType = parseContentType(response.headers.get('Content-Type'));
|
||||||
|
if (contentType.type !== 'text/html') {
|
||||||
|
window.log.warn(
|
||||||
|
'fetchLinkPreviewMetadata: Content-Type is not HTML; bailing'
|
||||||
|
);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const document = await getHtmlDocument(
|
||||||
|
response.body,
|
||||||
|
contentLength,
|
||||||
|
contentType.charset,
|
||||||
|
abortSignal
|
||||||
|
);
|
||||||
|
|
||||||
|
// [The Node docs about `ReadableStream.prototype[Symbol.asyncIterator]`][0] say that
|
||||||
|
// the stream will be destroyed if you `break` out of the loop, but I could not
|
||||||
|
// reproduce this. Also [`destroy` is a documented method][1] but it is not in the
|
||||||
|
// Node types, which is why we do this cast to `any`.
|
||||||
|
// [0]: https://nodejs.org/docs/latest-v12.x/api/stream.html#stream_readable_symbol_asynciterator
|
||||||
|
// [1]: https://nodejs.org/docs/latest-v12.x/api/stream.html#stream_readable_destroy_error
|
||||||
|
try {
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
|
(response.body as any).destroy();
|
||||||
|
} catch (err) {
|
||||||
|
// Ignored.
|
||||||
|
}
|
||||||
|
|
||||||
|
if (abortSignal.aborted) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return parseMetadata(document, response.url);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This attempts to fetch an image, returning `null` if it fails for any reason.
|
||||||
|
*
|
||||||
|
* NOTE: This does NOT validate the incoming URL for safety. For example, it may fetch an
|
||||||
|
* insecure HTTP href. It also does not offer a timeout; that is up to the caller.
|
||||||
|
*/
|
||||||
|
export async function fetchLinkPreviewImage(
|
||||||
|
fetchFn: FetchFn,
|
||||||
|
href: string,
|
||||||
|
abortSignal: AbortSignal
|
||||||
|
): Promise<null | LinkPreviewImage> {
|
||||||
|
let response: Response;
|
||||||
|
try {
|
||||||
|
response = await fetchWithRedirects(fetchFn, href, {
|
||||||
|
headers: {
|
||||||
|
'User-Agent': 'WhatsApp',
|
||||||
|
},
|
||||||
|
size: MAX_IMAGE_CONTENT_LENGTH,
|
||||||
|
signal: abortSignal,
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
window.log.warn('fetchLinkPreviewImage: failed to fetch image; bailing');
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (abortSignal.aborted) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
window.log.warn(
|
||||||
|
`fetchLinkPreviewImage: got a ${response.status} status code; bailing`
|
||||||
|
);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const contentLength = parseContentLength(
|
||||||
|
response.headers.get('Content-Length')
|
||||||
|
);
|
||||||
|
if (contentLength < MIN_IMAGE_CONTENT_LENGTH) {
|
||||||
|
window.log.warn(
|
||||||
|
'fetchLinkPreviewImage: Content-Length is too short; bailing'
|
||||||
|
);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (contentLength > MAX_IMAGE_CONTENT_LENGTH) {
|
||||||
|
window.log.warn(
|
||||||
|
'fetchLinkPreviewImage: Content-Length is too large or is unset; bailing'
|
||||||
|
);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const { type: contentType } = parseContentType(
|
||||||
|
response.headers.get('Content-Type')
|
||||||
|
);
|
||||||
|
if (!contentType || !VALID_IMAGE_MIME_TYPES.has(contentType)) {
|
||||||
|
window.log.warn(
|
||||||
|
'fetchLinkPreviewImage: Content-Type is not an image; bailing'
|
||||||
|
);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
let data: ArrayBuffer;
|
||||||
|
try {
|
||||||
|
data = await response.arrayBuffer();
|
||||||
|
} catch (err) {
|
||||||
|
window.log.warn('fetchLinkPreviewImage: failed to read body; bailing');
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return { data, contentType };
|
||||||
|
}
|
Loading…
Reference in New Issue