From ae2db9f09ac5de9356ee47f02eb629e35e370d47 Mon Sep 17 00:00:00 2001 From: Scott Nonnenberg Date: Thu, 21 Feb 2019 12:28:13 -0800 Subject: [PATCH] Improve handling for URLs composed of mixed character sets --- js/modules/link_previews.d.ts | 1 + js/modules/link_previews.js | 149 +++++++++++++++++++++++++ package.json | 1 + test/modules/link_previews_test.js | 27 +++++ ts/components/conversation/Linkify.tsx | 3 +- yarn.lock | 5 + 6 files changed, 185 insertions(+), 1 deletion(-) create mode 100644 js/modules/link_previews.d.ts diff --git a/js/modules/link_previews.d.ts b/js/modules/link_previews.d.ts new file mode 100644 index 000000000..ea3e3e3d1 --- /dev/null +++ b/js/modules/link_previews.d.ts @@ -0,0 +1 @@ +export function isLinkSneaky(link: string): boolean; diff --git a/js/modules/link_previews.js b/js/modules/link_previews.js index 6e35613fb..4d72da45c 100644 --- a/js/modules/link_previews.js +++ b/js/modules/link_previews.js @@ -2,6 +2,7 @@ const { isNumber, compact } = require('lodash'); const he = require('he'); +const punycode = require('punycode'); const LinkifyIt = require('linkify-it'); const linkify = LinkifyIt(); @@ -16,6 +17,7 @@ module.exports = { getImageMetaTag, isLinkInWhitelist, isMediaLinkInWhitelist, + isLinkSneaky, }; const SUPPORTED_DOMAINS = [ @@ -194,3 +196,150 @@ function assembleChunks(chunkDescriptors) { return concatenateBytes(...chunks); } + +const LATIN_PATTERN = new RegExp( + '[' + + '\\u0041-\\u005A' + + '\\u0061-\\u007A' + + '\\u00AA' + + '\\u00BA' + + '\\u00C0-\\u00DC' + + '\\u00D8-\\u00F6' + + '\\u00F8-\\u01BA' + + ']' +); + +const CYRILLIC_PATTERN = new RegExp( + '[' + + '\\u0400-\\u0481' + + '\\u0482' + + '\\u0483-\\u0484' + + '\\u0487' + + '\\u0488-\\u0489' + + '\\u048A-\\u052F' + + '\\u1C80-\\u1C88' + + '\\u1D2B' + + '\\u1D78' + + '\\u2DE0-\\u2DFF' + + '\\uA640-\\uA66D' + + '\\uA66E' + + '\\uA66F' + + '\\uA670-\\uA672' + + '\\uA673' + + '\\uA674-\\uA67D' + + '\\uA67E' + + '\\uA67F' + + '\\uA680-\\uA69B' + + '\\uA69C-\\uA69D' + + '\\uA69E-\\uA69F' + + '\\uFE2E-\\uFE2F' + + ']' +); + +const GREEK_PATTERN = new RegExp( + '[' + + '\\u0370-\\u0373' + + '\\u0375' + + '\\u0376-\\u0377' + + '\\u037A' + + '\\u037B-\\u037D' + + '\\u037F' + + '\\u0384' + + '\\u0386' + + '\\u0388-\\u038A' + + '\\u038C' + + '\\u038E-\\u03A1' + + '\\u03A3-\\u03E1' + + '\\u03F0-\\u03F5' + + '\\u03F6' + + '\\u03F7-\\u03FF' + + '\\u1D26-\\u1D2A' + + '\\u1D5D-\\u1D61' + + '\\u1D66-\\u1D6A' + + '\\u1DBF' + + '\\u1F00-\\u1F15' + + '\\u1F18-\\u1F1D' + + '\\u1F20-\\u1F45' + + '\\u1F48-\\u1F4D' + + '\\u1F50-\\u1F57' + + '\\u1F59' + + '\\u1F5B' + + '\\u1F5D' + + '\\u1F5F-\\u1F7D' + + '\\u1F80-\\u1FB4' + + '\\u1FB6-\\u1FBC' + + '\\u1FBD' + + '\\u1FBE' + + '\\u1FBF-\\u1FC1' + + '\\u1FC2-\\u1FC4' + + '\\u1FC6-\\u1FCC' + + '\\u1FCD-\\u1FCF' + + '\\u1FD0-\\u1FD3' + + '\\u1FD6-\\u1FDB' + + '\\u1FDD-\\u1FDF' + + '\\u1FE0-\\u1FEC' + + '\\u1FED-\\u1FEF' + + '\\u1FF2-\\u1FF4' + + '\\u1FF6-\\u1FFC' + + '\\u1FFD-\\u1FFE' + + '\\u2126' + + '\\uAB65' + + ']' +); + +const HIGH_GREEK_PATTERN = new RegExp( + '[' + + `${String.fromCodePoint(0x10140)}-${String.fromCodePoint(0x10174)}` + + `${String.fromCodePoint(0x10175)}-${String.fromCodePoint(0x10178)}` + + `${String.fromCodePoint(0x10179)}-${String.fromCodePoint(0x10189)}` + + `${String.fromCodePoint(0x1018a)}-${String.fromCodePoint(0x1018b)}` + + `${String.fromCodePoint(0x1018c)}-${String.fromCodePoint(0x1018e)}` + + `${String.fromCodePoint(0x101a0)}` + + `${String.fromCodePoint(0x1d200)}-${String.fromCodePoint(0x1d241)}` + + `${String.fromCodePoint(0x1d242)}-${String.fromCodePoint(0x1d244)}` + + `${String.fromCodePoint(0x1d245)}` + + ']', + 'u' +); + +function isChunkSneaky(chunk) { + const hasLatin = LATIN_PATTERN.test(chunk); + if (!hasLatin) { + return false; + } + + const hasCyrillic = CYRILLIC_PATTERN.test(chunk); + if (hasCyrillic) { + return true; + } + + const hasGreek = GREEK_PATTERN.test(chunk); + if (hasGreek) { + return true; + } + + const hasHighGreek = HIGH_GREEK_PATTERN.test(chunk); + if (hasHighGreek) { + return true; + } + + return false; +} + +function isLinkSneaky(link) { + const domain = getDomain(link); + + // This is necesary because getDomain returns domains in punycode form + // We'd like to use require('url').domainToUnicode() but it's a no-op in a BrowserWindow + const unicodeDomain = punycode.toUnicode(domain); + + const chunks = unicodeDomain.split('.'); + for (let i = 0, max = chunks.length; i < max; i += 1) { + const chunk = chunks[i]; + if (isChunkSneaky(chunk)) { + return true; + } + } + + return false; +} diff --git a/package.json b/package.json index c5ae5ad43..c36d9fbcf 100644 --- a/package.json +++ b/package.json @@ -79,6 +79,7 @@ "pify": "3.0.0", "protobufjs": "~6.8.6", "proxy-agent": "3.0.3", + "punycode": "2.1.1", "react": "16.2.0", "react-contextmenu": "2.9.2", "react-dom": "16.2.0", diff --git a/test/modules/link_previews_test.js b/test/modules/link_previews_test.js index 8baec0e96..0cae46b93 100644 --- a/test/modules/link_previews_test.js +++ b/test/modules/link_previews_test.js @@ -5,6 +5,7 @@ const { getTitleMetaTag, getImageMetaTag, isLinkInWhitelist, + isLinkSneaky, isMediaLinkInWhitelist, } = require('../../js/modules/link_previews'); @@ -305,4 +306,30 @@ describe('Link previews', () => { assert.deepEqual(expected, actual); }); }); + + describe('#isLinkSneaky', () => { + it('returns false for all-latin domain', () => { + const link = 'https://www.amazon.com'; + const actual = isLinkSneaky(link); + assert.strictEqual(actual, false); + }); + + it('returns true for Latin + Cyrillic domain', () => { + const link = 'https://www.aмazon.com'; + const actual = isLinkSneaky(link); + assert.strictEqual(actual, true); + }); + + it('returns true for Latin + Greek domain', () => { + const link = 'https://www.αpple.com'; + const actual = isLinkSneaky(link); + assert.strictEqual(actual, true); + }); + + it('returns true for Latin + High Greek domain', () => { + const link = `https://www.apple${String.fromCodePoint(0x101a0)}.com`; + const actual = isLinkSneaky(link); + assert.strictEqual(actual, true); + }); + }); }); diff --git a/ts/components/conversation/Linkify.tsx b/ts/components/conversation/Linkify.tsx index 0bc4af763..86e651629 100644 --- a/ts/components/conversation/Linkify.tsx +++ b/ts/components/conversation/Linkify.tsx @@ -3,6 +3,7 @@ import React from 'react'; import LinkifyIt from 'linkify-it'; import { RenderTextCallback } from '../../types/Util'; +import { isLinkSneaky } from '../../../js/modules/link_previews'; const linkify = LinkifyIt(); @@ -49,7 +50,7 @@ export class Linkify extends React.Component { } const { url, text: originalText } = match; - if (SUPPORTED_PROTOCOLS.test(url)) { + if (SUPPORTED_PROTOCOLS.test(url) && !isLinkSneaky(url)) { results.push( {originalText} diff --git a/yarn.lock b/yarn.lock index c7af81636..8ed4e56d8 100644 --- a/yarn.lock +++ b/yarn.lock @@ -6896,6 +6896,11 @@ punycode@^2.1.0: version "2.1.0" resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.1.0.tgz#5f863edc89b96db09074bad7947bf09056ca4e7d" +punycode@^2.1.1: + version "2.1.1" + resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.1.1.tgz#b58b010ac40c22c5657616c8d2c2c02c7bf479ec" + integrity sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A== + q-i@^2.0.1: version "2.0.1" resolved "https://registry.yarnpkg.com/q-i/-/q-i-2.0.1.tgz#fec7e3f0e713f3467358bb5ac80bcc4c115187d6"