From 19c6bebab087d700abee7f831bea4675a38154dc Mon Sep 17 00:00:00 2001 From: Audric Ackermann Date: Mon, 2 Aug 2021 14:33:39 +1000 Subject: [PATCH] make sure to retry fetch sqwarm with a new targetNode if needed --- app/sql.js | 2 +- ts/session/snode_api/SNodeAPI.ts | 80 +++++++++++++++++++++++--------- ts/session/snode_api/onions.ts | 13 ++++-- 3 files changed, 68 insertions(+), 27 deletions(-) diff --git a/app/sql.js b/app/sql.js index b7fee1375..a4973a281 100644 --- a/app/sql.js +++ b/app/sql.js @@ -121,7 +121,7 @@ const ITEMS_TABLE = 'items'; const ATTACHMENT_DOWNLOADS_TABLE = 'attachment_downloads'; const CLOSED_GROUP_V2_KEY_PAIRS_TABLE = 'encryptionKeyPairsForClosedGroupV2'; -const MAX_PUBKEYS_MEMBERS = 1000; +const MAX_PUBKEYS_MEMBERS = 300; function objectToJSON(data) { return JSON.stringify(data); diff --git a/ts/session/snode_api/SNodeAPI.ts b/ts/session/snode_api/SNodeAPI.ts index 447d632a7..18b21a452 100644 --- a/ts/session/snode_api/SNodeAPI.ts +++ b/ts/session/snode_api/SNodeAPI.ts @@ -199,7 +199,7 @@ export type SendParams = { // get snodes for pubkey from random snode. Uses an existing snode -async function requestSnodesForPubkeyRetryable( +async function requestSnodesForPubkeyWithTargetNodeRetryable( pubKey: string, targetNode: Snode ): Promise> { @@ -210,15 +210,15 @@ async function requestSnodesForPubkeyRetryable( if (!result) { window?.log?.warn( - `LokiSnodeAPI::requestSnodesForPubkeyRetryable - lokiRpc on ${targetNode.ip}:${targetNode.port} returned falsish value`, + `LokiSnodeAPI::requestSnodesForPubkeyWithTargetNodeRetryable - lokiRpc on ${targetNode.ip}:${targetNode.port} returned falsish value`, result ); - throw new Error('requestSnodesForPubkeyRetryable: Invalid result'); + throw new Error('requestSnodesForPubkeyWithTargetNodeRetryable: Invalid result'); } if (result.status !== 200) { window?.log?.warn('Status is not 200 for get_snodes_for_pubkey'); - throw new Error('requestSnodesForPubkeyRetryable: Invalid status code'); + throw new Error('requestSnodesForPubkeyWithTargetNodeRetryable: Invalid status code'); } try { @@ -240,26 +240,64 @@ async function requestSnodesForPubkeyRetryable( } } +async function requestSnodesForPubkeyWithTargetNode( + pubKey: string, + targetNode: Snode +): Promise> { + // don't catch exception in here. we want them to bubble up + + // this is the level where our targetNode is supposed to be valid. We retry a few times with this one. + // if all our retries fails, we retry from the caller of this function with a new target node. + return pRetry( + async () => { + return requestSnodesForPubkeyWithTargetNodeRetryable(pubKey, targetNode); + }, + { + retries: 3, + factor: 2, + minTimeout: 100, + maxTimeout: 2000, + onFailedAttempt: e => { + window?.log?.warn( + `requestSnodesForPubkeyWithTargetNode attempt #${e.attemptNumber} failed. ${e.retriesLeft} retries left...` + ); + }, + } + ); +} + +async function requestSnodesForPubkeyRetryable(pubKey: string): Promise> { + // don't catch exception in here. we want them to bubble up + + // this is the level where our targetNode is not yet known. We retry a few times with a new one everytime. + // the idea is that the requestSnodesForPubkeyWithTargetNode will remove a failing targetNode + return pRetry( + async () => { + const targetNode = await getRandomSnode(); + + return requestSnodesForPubkeyWithTargetNode(pubKey, targetNode); + }, + { + retries: 3, + factor: 2, + minTimeout: 100, + maxTimeout: 4000, + onFailedAttempt: e => { + window?.log?.warn( + `requestSnodesForPubkeyRetryable attempt #${e.attemptNumber} failed. ${e.retriesLeft} retries left...` + ); + }, + } + ); +} + export async function requestSnodesForPubkey(pubKey: string): Promise> { try { - const targetNode = await getRandomSnode(); + // catch exception in here only. + // the idea is that the pretry will retry a few times each calls, except if an AbortError is thrown. - return await pRetry( - async () => { - return requestSnodesForPubkeyRetryable(pubKey, targetNode); - }, - { - retries: 10, // each path can fail 3 times before being dropped, we have 3 paths at most - factor: 2, - minTimeout: 1000, - maxTimeout: 4000, - onFailedAttempt: e => { - window?.log?.warn( - `requestSnodesForPubkey attempt #${e.attemptNumber} failed. ${e.retriesLeft} retries left...` - ); - }, - } - ); + // if all retry fails, we will end up in the catch below when the last exception thrown + return await requestSnodesForPubkeyRetryable(pubKey); } catch (e) { window?.log?.error('LokiSnodeAPI::requestSnodesForPubkey - error', e); diff --git a/ts/session/snode_api/onions.ts b/ts/session/snode_api/onions.ts index 81d0bd241..13778012a 100644 --- a/ts/session/snode_api/onions.ts +++ b/ts/session/snode_api/onions.ts @@ -6,7 +6,7 @@ import ByteBuffer from 'bytebuffer'; import { OnionPaths } from '../onions'; import { fromHex, toHex } from '../utils/String'; import pRetry from 'p-retry'; -import { incrementBadPathCountOrDrop } from '../onions/onionPath'; +import { ed25519Str, incrementBadPathCountOrDrop } from '../onions/onionPath'; import _ from 'lodash'; import { hrefPnServerDev, hrefPnServerProd } from '../../pushnotification/PnServer'; // hold the ed25519 key of a snode against the time it fails. Used to remove a snode only after a few failures (snodeFailureThreshold failures) @@ -291,6 +291,7 @@ async function processAnyOtherErrorOnPath( // If we have a specific node in fault we can exclude just this node. // Otherwise we increment the whole path failure count if (nodeNotFound) { + window?.log?.warn('node not found error with: ', ed25519Str(nodeNotFound)); await exports.incrementBadSnodeCountOrDrop({ snodeEd25519: nodeNotFound, guardNodeEd25519, @@ -613,15 +614,15 @@ export async function incrementBadSnodeCountOrDrop({ const newFailureCount = oldFailureCount + 1; snodeFailureCount[snodeEd25519] = newFailureCount; if (newFailureCount >= snodeFailureThreshold) { - window?.log?.warn(`Failure threshold reached for: ${snodeEd25519}; dropping it.`); + window?.log?.warn(`Failure threshold reached for: ${ed25519Str(snodeEd25519)}; dropping it.`); if (associatedWith) { (window?.log?.info || console.warn)( - `Dropping ${snodeEd25519} from swarm of ${associatedWith}` + `Dropping ${ed25519Str(snodeEd25519)} from swarm of ${ed25519Str(associatedWith)}` ); await dropSnodeFromSwarmIfNeeded(associatedWith, snodeEd25519); } - window?.log?.info(`Dropping ${snodeEd25519} from snodepool`); + window?.log?.info(`Dropping ${ed25519Str(snodeEd25519)} from snodepool`); await dropSnodeFromSnodePool(snodeEd25519); // the snode was ejected from the pool so it won't be used again. @@ -642,7 +643,9 @@ export async function incrementBadSnodeCountOrDrop({ } } else { window?.log?.warn( - `Couldn't reach snode at: ${snodeEd25519}; setting his failure count to ${newFailureCount}` + `Couldn't reach snode at: ${ed25519Str( + snodeEd25519 + )}; setting his failure count to ${newFailureCount}` ); } }