From 0e4920cb0dcb1dc20ca041fdee4cb6228bbac118 Mon Sep 17 00:00:00 2001 From: Audric Ackermann Date: Wed, 2 Nov 2022 14:27:14 +1100 Subject: [PATCH] feat: add bencoded/decoded + tests --- ts/session/utils/Bencoding.ts | 270 ++++++++++++++++++++ ts/test/session/unit/utils/Bencoded_test.ts | 246 ++++++++++++++++++ 2 files changed, 516 insertions(+) create mode 100644 ts/session/utils/Bencoding.ts create mode 100644 ts/test/session/unit/utils/Bencoded_test.ts diff --git a/ts/session/utils/Bencoding.ts b/ts/session/utils/Bencoding.ts new file mode 100644 index 000000000..6bdf56c8f --- /dev/null +++ b/ts/session/utils/Bencoding.ts @@ -0,0 +1,270 @@ +import { from_string, to_string } from 'libsodium-wrappers-sumo'; +import { isArray, isEmpty, isNumber, isPlainObject, isString, toNumber } from 'lodash'; +import { StringUtils } from '.'; + +const e = 'e'; // end of whatever was before +const l = 'l'; // list of values +const i = 'i'; // start of integer +const d = 'd'; // start of dictionary +const colon = ':'; + +const eCode = e.charCodeAt(0); // end of whatever was before +const lCode = l.charCodeAt(0); // list of values +const iCode = i.charCodeAt(0); // start of integer +const dCode = d.charCodeAt(0); // start of dictionary +const colonCode = colon.charCodeAt(0); + +interface BencodeDictType { + [key: string]: BencodeElementType; +} + +type BencodeArrayType = Array; + +type BencodeElementType = number | string | BencodeDictType | BencodeArrayType; +const NUMBERS = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']; + +export class BDecode { + private readonly content: Uint8Array; + private currentParsingIndex = 0; + private readonly parsedContent: BencodeElementType; + + constructor(content: Uint8Array | string) { + this.content = isString(content) ? from_string(content) : content; + this.parsedContent = this.parseContent(); + } + + public getParsedContent() { + return this.parsedContent; + } + + /** + * Decode an int from a byte array starting with charCode of `i` and ending with charCode `e` + */ + private parseInt(): number { + if (this.currentParsingIndex >= this.content.length) { + throw new Error('parseInt: out of bounds'); + } + if (this.content[this.currentParsingIndex] !== iCode) { + throw new Error('parseInt: not the start of an int'); + } + + this.currentParsingIndex++; // drop `i` + const startIntStr = this.currentParsingIndex; // save the start of the int + const nextEndSeparator = this.content.indexOf(eCode, this.currentParsingIndex); + if (nextEndSeparator === -1) { + throw new Error('parseInt: not an int to be parsed here: no end separator'); + } + + const parsed = toNumber(to_string(this.content.slice(startIntStr, nextEndSeparator))); + + if (!isFinite(parsed)) { + throw new Error(`parseInt: could not parse number ${parsed}`); + } + this.currentParsingIndex = nextEndSeparator; + this.currentParsingIndex++; // drop the 'e' + + return parsed; + } + + parseList(): BencodeArrayType { + const parsed: BencodeArrayType = []; + + if (this.currentParsingIndex >= this.content.length) { + throw new Error('parseList: out of bounds'); + } + if (this.content[this.currentParsingIndex] !== lCode) { + throw new Error('parseList: not the start of a list'); + } + + this.currentParsingIndex++; // drop `l` + + while ( + this.currentParsingIndex < this.content.length && + this.content[this.currentParsingIndex] !== eCode + ) { + parsed.push(this.parseBlock()); + } + this.currentParsingIndex++; // drop the 'e' + + return parsed; + } + + private parseDict() { + const parsed: BencodeDictType = {}; + + if (this.currentParsingIndex >= this.content.length) { + throw new Error('parseDict: out of bounds'); + } + if (this.content[this.currentParsingIndex] !== dCode) { + throw new Error('parseDict: not the start of a dict'); + } + + this.currentParsingIndex++; // drop `d` + + while ( + this.currentParsingIndex < this.content.length && + this.content[this.currentParsingIndex] !== eCode + ) { + const key = this.parseString(); + const value = this.parseBlock(); + parsed[key] = value; + } + this.currentParsingIndex++; // drop the 'e' + + return parsed; + } + + /** + * Decode a string element from iterator assumed to have structure `length:data` + */ + private parseString(): string { + if (this.currentParsingIndex >= this.content.length) { + throw new Error('parseString: out of bounds'); + } + + // this.currentParsingIndex++; + const separatorIndex = this.content.indexOf(colonCode, this.currentParsingIndex); + if (separatorIndex === -1) { + throw new Error('parseString: cannot parse string without separator'); + } + const strLength = toNumber( + to_string(this.content.slice(this.currentParsingIndex, separatorIndex)) + ); + if (!isFinite(strLength)) { + throw new Error('parseString: cannot parse string without length'); + } + + + if (strLength === 0) { + return ''; + } + + if (strLength > this.content.length - separatorIndex - 1) { + throw new Error( + 'parseString: length is too long considering what we have left on this string' + ); + } + const strContent = this.content.slice(separatorIndex + 1, separatorIndex + 1 + strLength); + this.currentParsingIndex = separatorIndex + 1 + strLength; + return StringUtils.decode(strContent, 'utf8'); + } + + private parseContent() { + return this.parseBlock(); + } + + private parseBlock() { + let parsed: BencodeElementType; + if (this.content.length < this.currentParsingIndex) { + throw new Error('Out of bounds'); + } + if (this.content[this.currentParsingIndex] === lCode) { + parsed = this.parseList(); + } else if (this.content[this.currentParsingIndex] === dCode) { + parsed = this.parseDict(); + } else if (this.content[this.currentParsingIndex] === iCode) { + parsed = this.parseInt(); + } else if (NUMBERS.some(num => this.content[this.currentParsingIndex] === num.charCodeAt(0))) { + parsed = this.parseString(); + } else { + throw new Error( + `parseBlock: Could not parse charCode at ${this.currentParsingIndex}: ${ + this.content[this.currentParsingIndex] + }. Length: ${this.content.length}` + ); + } + + return parsed; + } +} + +export class BEncode { + private readonly input: BencodeElementType; + + private readonly bencodedContent: Uint8Array; + + constructor(content: BencodeElementType) { + this.input = content; + this.bencodedContent = this.encodeContent(); + } + + public getBencodedContent() { + return this.bencodedContent; + } + + private encodeItem(item: BencodeElementType): Uint8Array { + + if (isNumber(item) && isFinite(item)) { + return from_string(`i${item}e`); + } + + if (isNumber(item)) { + throw new Error('encodeItem not finite number'); + } + + if (isString(item)) { + const content = new Uint8Array(StringUtils.encode(item, 'utf8')); + + const contentLengthLength = `${content.length}`.length; + const toReturn = new Uint8Array(content.length + 1 + contentLengthLength); + + toReturn.set(from_string(`${content.length}`)); + toReturn.set([colonCode], contentLengthLength); + toReturn.set(content, contentLengthLength + 1); + return toReturn; + } + + if (isArray(item)) { + let content = new Uint8Array(); + for (let index = 0; index < item.length; index++) { + const encodedItem = this.encodeItem(item[index]); + const encodedItemLength = encodedItem.length; + const existingContentLength = content.length; + const newContent = new Uint8Array(existingContentLength + encodedItemLength); + newContent.set(content); + newContent.set(encodedItem, content.length); + content = newContent; + } + const toReturn = new Uint8Array(content.length + 2); + toReturn.set([lCode]); + toReturn.set(content, 1); + toReturn.set([eCode], content.length + 1); + + return toReturn; + } + + if (isPlainObject(item)) { + // bencoded objects keys must be sorted lexicographically + const sortedKeys = Object.keys(item).sort(); + let content = new Uint8Array(); + + sortedKeys.forEach(key => { + const value = item[key]; + + const encodedKey = this.encodeItem(key); + const encodedValue = this.encodeItem(value); + const newContent = new Uint8Array(content.length + encodedKey.length + encodedValue.length); + newContent.set(content); + newContent.set(encodedKey, content.length); + newContent.set(encodedValue, content.length + encodedKey.length); + content = newContent; + }); + const toReturn = new Uint8Array(content.length + 2); + toReturn.set([dCode]); + toReturn.set(content, 1); + toReturn.set([eCode], content.length + 1); + + return toReturn; + } + + throw new Error(`encodeItem: unknown type to encode ${typeof item}`); + } + + private encodeContent(): Uint8Array { + if (!this.input || (isEmpty(this.input) && !isNumber(this.input))) { + return new Uint8Array(); + } + + return this.encodeItem(this.input); + } +} diff --git a/ts/test/session/unit/utils/Bencoded_test.ts b/ts/test/session/unit/utils/Bencoded_test.ts new file mode 100644 index 000000000..57092ab35 --- /dev/null +++ b/ts/test/session/unit/utils/Bencoded_test.ts @@ -0,0 +1,246 @@ +// tslint:disable: no-implicit-dependencies max-func-body-length no-unused-expression +import chai from 'chai'; + +import chaiAsPromised from 'chai-as-promised'; +import { from_string } from 'libsodium-wrappers-sumo'; +import { StringUtils } from '../../../../session/utils'; +import { BDecode, BEncode } from '../../../../session/utils/Bencoding'; +chai.use(chaiAsPromised as any); + +const { expect } = chai; + +describe('Bencoding: BDecode Utils', () => { + describe('From a string', () => { + describe('parseInt', () => { + it('parse 12', () => { + expect(new BDecode('i12e').getParsedContent()).to.equal(12); + }); + + it('parse 0', () => { + expect(new BDecode('ie').getParsedContent()).to.equal(0); + }); + + it('parse 12232332', () => { + expect(new BDecode('i12232332e').getParsedContent()).to.equal(12232332); + }); + + it('parse 12232332 even if extra characters', () => { + expect(new BDecode('i12232332eoverflow.d').getParsedContent()).to.equal(12232332); + }); + + it('throws invalid start', () => { + expect(() => new BDecode('d12232332e').getParsedContent()).to.throw(); + }); + + it('throws invalid end', () => { + expect(() => new BDecode('i12232332d').getParsedContent()).to.throw(); + }); + + it('throws invalid integer', () => { + expect(() => new BDecode('i1223233qw2e').getParsedContent()).to.throw(); + }); + }); + + describe('parseString', () => { + it('parse short string ', () => { + expect(new BDecode('1:a').getParsedContent()).to.equal('a'); + }); + + it('parse string with emojis ', () => { + expect(new BDecode('8:🎃🥸').getParsedContent()).to.equal('🎃🥸'); + expect(new BDecode('26:❤️‍🔥❤️‍🔥').getParsedContent()).to.equal('❤️‍🔥❤️‍🔥'); + }); + + it('parse non ascii string', () => { + expect(new BDecode('48:転キハ連月ざれ地周りを報最こもろ').getParsedContent()).to.equal( + '転キハ連月ざれ地周りを報最こもろ' + ); + }); + + it('parse longer string', () => { + expect( + new BDecode( + "320:Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic" + ).getParsedContent() + ).to.equal( + "Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic" + ); + }); + + it('throw if no separator ', () => { + expect(() => new BDecode('1aa').getParsedContent()).to.throw(); + }); + + it('throw if first part is not a number ', () => { + expect(() => new BDecode('1a1:aa').getParsedContent()).to.throw(); + }); + + it('throw if length brings us way out of range ', () => { + expect(() => new BDecode('322:aa').getParsedContent()).to.throw(); + }); + + it('throw if length brings us just out of range ', () => { + expect(() => new BDecode('3:aa').getParsedContent()).to.throw(); + }); + }); + }); + + describe('parseDict', () => { + it('parse single entry dict with string `d3:bar4:spame`', () => { + expect(new BDecode('d3:bar4:spame').getParsedContent()).to.deep.equal({ bar: 'spam' }); + }); + + it('parse single entry dict with multiple strings `d3:bar4:spam5:barre2:twe`', () => { + expect(new BDecode('d3:bar4:spam5:barre2:twe').getParsedContent()).to.deep.equal({ + bar: 'spam', + barre: 'tw', + }); + }); + + it('parse multiple entries dict and int', () => { + expect(new BDecode('d3:bar4:spam3:fooi42ee').getParsedContent()).to.deep.equal({ + bar: 'spam', + foo: 42, + }); + }); + + it('parse multiple entries with ints', () => { + expect(new BDecode('d3:bari999e3:fooi42ee').getParsedContent()).to.deep.equal({ + bar: 999, + foo: 42, + }); + }); + + it('parse single entry with emoji', () => { + expect(new BDecode('d3:bar8:🎃🥸e').getParsedContent()).to.deep.equal({ + bar: '🎃🥸', + }); + }); + }); + + describe('parseList', () => { + it('parse single entry', () => { + expect(new BDecode('l4:spame').getParsedContent()).to.deep.equal(['spam']); + }); + + it('parse multiple entries ', () => { + expect(new BDecode('l4:spam2:spe').getParsedContent()).to.deep.equal(['spam', 'sp']); + }); + + it('parse multiple entries witrh int and strings ', () => { + expect(new BDecode('l4:spam2:spi42e2:42e').getParsedContent()).to.deep.equal([ + 'spam', + 'sp', + 42, + '42', + ]); + }); + + it('parse list with dict included ', () => { + expect(new BDecode('ld3:bari999e3:fooi42eee').getParsedContent()).to.deep.equal([ + { + bar: 999, + foo: 42, + }, + ]); + }); + + it('parse list with mulitple dict included ', () => { + expect( + new BDecode('ld3:bari999e3:fooi42eed3:rabi111e3:offi2312eee').getParsedContent() + ).to.deep.equal([ + { + bar: 999, + foo: 42, + }, + { + rab: 111, + off: 2312, + }, + ]); + }); + + it('parse dict with list included ', () => { + expect(new BDecode('d2:dili42ei24e4:key7ee').getParsedContent()).to.deep.equal({ + di: [42, 24, 'key7'], + }); + }); + + it('parse dict with multiple lists included ', () => { + expect(new BDecode('d2:dili42ei24e4:key7e4:secol4:key7ee').getParsedContent()).to.deep.equal({ + di: [42, 24, 'key7'], + seco: ['key7'], + }); + }); + }); +}); + +describe('Bencoding: BEncode Utils', () => { + it('encode single string', () => { + expect(new BEncode('abcdef').getBencodedContent()).to.deep.equal( + new Uint8Array(StringUtils.encode('6:abcdef', 'utf8')) + ); + }); + + it('encode single string emoji', () => { + expect(new BEncode('🎃🥸').getBencodedContent()).to.deep.equal( + new Uint8Array(StringUtils.encode('8:🎃🥸', 'utf8')) + ); + }); + + it('encode single int', () => { + expect(new BEncode(12).getBencodedContent()).to.deep.equal(from_string('i12e')); + }); + + it('encode array with one int', () => { + expect(new BEncode([12]).getBencodedContent()).to.deep.equal(from_string('li12ee')); + }); + + it('encode array with multiple int', () => { + expect(new BEncode([12, 34, 5678]).getBencodedContent()).to.deep.equal( + from_string('li12ei34ei5678ee') + ); + }); + + it('encode array with different types', () => { + expect(new BEncode([12, '34', 5678]).getBencodedContent()).to.deep.equal( + from_string('li12e2:34i5678ee') + ); + }); + + it('encode dict with one item', () => { + expect(new BEncode({ dict: '123' }).getBencodedContent()).to.deep.equal( + from_string('d4:dict3:123e') + ); + }); + + it('encode dict with several items', () => { + expect(new BEncode({ dict: '123', dict2: '1234' }).getBencodedContent()).to.deep.equal( + from_string('d4:dict3:1235:dict24:1234e') + ); + }); + + it('encode dict with several items with arrays', () => { + expect(new BEncode({ dict1: [1, 2, 3], dict2: [4, 5, 6] }).getBencodedContent()).to.deep.equal( + from_string('d5:dict1li1ei2ei3ee5:dict2li4ei5ei6eee') + ); + }); + + it('encode dict with several items but sort them', () => { + expect(new BEncode({ dict2: 'second', dict1: 'first' }).getBencodedContent()).to.deep.equal( + from_string('d5:dict15:first5:dict26:seconde') + ); + }); + + it('encode dict with array with dict', () => { + expect(new BEncode({ dict: [{ a: 'b', c: 'd' }] }).getBencodedContent()).to.deep.equal( + from_string('d4:dictld1:a1:b1:c1:deee') + ); + }); + + it('encode dict with array with dict with emojis', () => { + expect(new BEncode({ dict: [{ a: 'b', c: '🎃🥸' }] }).getBencodedContent()).to.deep.equal( + from_string('d4:dictld1:a1:b1:c8:🎃🥸eee') + ); + }); +});