fix(lite): fast retry when a server is only warming up (-28)

When the preferred lightwalletd server is reachable but warming up (JSON-RPC -28
/ "Activating best chain"), the failover treated it like a dead server and fell
through to the others, so the wallet didn't open until the next 20s retry — even
though the healthy server was ready within seconds.

Detect the warmup error during failover, flag it on the open outcome
(lastOpenWasWarmup()), and have the App retry on a short ~4s interval in that case
instead of 20s, so the wallet opens promptly once warmup clears. A unit test
covers a warming-preferred + dead-fallback open setting the flag.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-07 21:26:14 -05:00
parent dc07491abb
commit 3d4b013b0c
5 changed files with 61 additions and 8 deletions

View File

@@ -490,11 +490,13 @@ void App::update()
// needs no passphrase — it just loads the file and contacts the server). Running it off // needs no passphrase — it just loads the file and contacts the server). Running it off
// the UI thread means an unreachable server never freezes startup, and trying the other // the UI thread means an unreachable server never freezes startup, and trying the other
// default servers means one dead server no longer strands the wallet. Retried on an // default servers means one dead server no longer strands the wallet. Retried on an
// interval so a transient outage self-heals once a server comes back. // interval so a transient outage self-heals once a server comes back — and much sooner
// (a few seconds) when the failure was a server merely warming up (-28), which clears fast.
const double nowSecs = ImGui::GetTime(); const double nowSecs = ImGui::GetTime();
const double retryInterval = lite_wallet_->lastOpenWasWarmup() ? 4.0 : 20.0;
if (!lite_wallet_->walletOpen() && !lite_wallet_->openInProgress() && if (!lite_wallet_->walletOpen() && !lite_wallet_->openInProgress() &&
lite_wallet_->walletExists() && lite_wallet_->walletExists() &&
(!lite_autoopen_done_ || nowSecs - lite_open_last_attempt_ > 20.0)) { (!lite_autoopen_done_ || nowSecs - lite_open_last_attempt_ > retryInterval)) {
lite_autoopen_done_ = true; lite_autoopen_done_ = true;
lite_open_last_attempt_ = nowSecs; lite_open_last_attempt_ = nowSecs;
lite_wallet_->beginOpenExisting(); lite_wallet_->beginOpenExisting();

View File

@@ -22,6 +22,16 @@ namespace wallet {
namespace { namespace {
constexpr double kZatoshisPerCoin = 100000000.0; // DRGX has 1e8 zatoshis per coin constexpr double kZatoshisPerCoin = 100000000.0; // DRGX has 1e8 zatoshis per coin
// A lightwalletd open error that means "server is up but still warming up" (JSON-RPC -28 /
// "Activating best chain", "Loading"/"Verifying"/"Rescanning" phases) rather than a dead server.
// Such a server will be ready shortly, so the caller should retry it soon.
bool liteOpenErrorIsWarmup(const std::string& error)
{
const auto has = [&error](const char* s) { return error.find(s) != std::string::npos; };
return has("-28") || has("Activating best chain") || has("warming up") ||
has("Loading block") || has("Verifying blocks") || has("Rescanning");
}
// Extract a backend {"error":..} message (string or arbitrary JSON) into a plain string. // Extract a backend {"error":..} message (string or arbitrary JSON) into a plain string.
std::string extractJsonError(const nlohmann::json& value) std::string extractJsonError(const nlohmann::json& value)
{ {
@@ -361,7 +371,10 @@ bool LiteWalletController::beginOpenExisting()
} }
const std::string why = call.error.empty() ? "unreachable" : call.error; const std::string why = call.error.empty() ? "unreachable" : call.error;
liteLog(" " + url + ": " + why); liteLog(" " + url + ": " + why);
if (!call.error.empty()) outcome.error = call.error; if (!call.error.empty()) {
outcome.error = call.error;
if (liteOpenErrorIsWarmup(call.error)) outcome.warming = true; // healthy, just starting
}
} }
{ {
std::lock_guard<std::mutex> lk(*resultMutex); std::lock_guard<std::mutex> lk(*resultMutex);
@@ -386,6 +399,7 @@ void LiteWalletController::pumpAsyncOpen()
if (outcome.ok) { if (outcome.ok) {
walletOpen_ = true; walletOpen_ = true;
lastOpenError_.clear(); lastOpenError_.clear();
lastOpenWarming_ = false;
status_ = WalletBackendStatus{WalletBackendState::Ready, "wallet open", {}, {}, 0.0}; status_ = WalletBackendStatus{WalletBackendState::Ready, "wallet open", {}, {}, 0.0};
liteLog("Wallet opened via " + outcome.serverUrl); liteLog("Wallet opened via " + outcome.serverUrl);
if (persist_) persist_(); if (persist_) persist_();
@@ -393,8 +407,10 @@ void LiteWalletController::pumpAsyncOpen()
startWorker(); // begin periodic refresh -> WalletState startWorker(); // begin periodic refresh -> WalletState
} else { } else {
lastOpenError_ = outcome.error; lastOpenError_ = outcome.error;
lastOpenWarming_ = outcome.warming; // a healthy server was warming up -> retry sooner
status_ = WalletBackendStatus{WalletBackendState::Error, outcome.error, {}, {}, 0.0}; status_ = WalletBackendStatus{WalletBackendState::Error, outcome.error, {}, {}, 0.0};
liteLog("Open failed: " + outcome.error); liteLog("Open failed: " + outcome.error +
(outcome.warming ? " (a server is warming up — will retry shortly)" : ""));
} }
} }

View File

@@ -173,6 +173,10 @@ public:
void pumpAsyncOpen(); void pumpAsyncOpen();
bool openInProgress() const { return openRunning_ && openRunning_->load(); } bool openInProgress() const { return openRunning_ && openRunning_->load(); }
const std::string& lastOpenError() const { return lastOpenError_; } const std::string& lastOpenError() const { return lastOpenError_; }
// True if the last failed open hit a server that was merely warming up (JSON-RPC -28 /
// "Activating best chain"): the server is healthy and will be ready shortly, so the caller
// should retry sooner rather than waiting out the normal interval.
bool lastOpenWasWarmup() const { return lastOpenWarming_; }
bool syncStarted() const { return syncStarted_; } bool syncStarted() const { return syncStarted_; }
bool syncComplete() const { return syncDone_ && syncDone_->load(); } bool syncComplete() const { return syncDone_ && syncDone_->load(); }
@@ -290,13 +294,14 @@ private:
// the detached thread captures only shared_ptrs + value copies, never `this`, so it can // the detached thread captures only shared_ptrs + value copies, never `this`, so it can
// safely outlive the controller). pumpAsyncOpen() finalizes the result on the main thread. // safely outlive the controller). pumpAsyncOpen() finalizes the result on the main thread.
std::vector<std::string> failoverServerUrls() const; std::vector<std::string> failoverServerUrls() const;
struct OpenOutcome { bool ok = false; std::string serverUrl; std::string error; }; struct OpenOutcome { bool ok = false; bool warming = false; std::string serverUrl; std::string error; };
std::thread openThread_; std::thread openThread_;
std::shared_ptr<std::atomic<bool>> openRunning_ = std::make_shared<std::atomic<bool>>(false); std::shared_ptr<std::atomic<bool>> openRunning_ = std::make_shared<std::atomic<bool>>(false);
std::shared_ptr<std::mutex> openResultMutex_ = std::make_shared<std::mutex>(); std::shared_ptr<std::mutex> openResultMutex_ = std::make_shared<std::mutex>();
std::shared_ptr<std::optional<OpenOutcome>> openResult_ = std::shared_ptr<std::optional<OpenOutcome>> openResult_ =
std::make_shared<std::optional<OpenOutcome>>(); std::make_shared<std::optional<OpenOutcome>>();
std::string lastOpenError_; // main-thread only std::string lastOpenError_; // main-thread only
bool lastOpenWarming_ = false; // last failed open hit a warming-up (-28) server
// Joinable background refresh worker (fast iterations: syncstatus, plus data once synced). // Joinable background refresh worker (fast iterations: syncstatus, plus data once synced).
std::thread worker_; std::thread worker_;

View File

@@ -40,6 +40,8 @@ inline std::atomic<bool> g_liteFakeEncrypted{false}; // wallet-encryption state
inline std::atomic<bool> g_liteFakeLocked{false}; // spending-keys-locked state inline std::atomic<bool> g_liteFakeLocked{false}; // spending-keys-locked state
// initialize_existing fails for any server URL containing this substring (open-failover tests). // initialize_existing fails for any server URL containing this substring (open-failover tests).
inline std::string g_liteFakeDeadServerSubstr; inline std::string g_liteFakeDeadServerSubstr;
// initialize_existing returns a warming-up (-28) error for URLs containing this substring.
inline std::string g_liteFakeWarmupServerSubstr;
inline void resetLiteFakeCounters() inline void resetLiteFakeCounters()
{ {
@@ -47,6 +49,7 @@ inline void resetLiteFakeCounters()
g_liteFakeFreed = 0; g_liteFakeFreed = 0;
g_liteFakeShutdownCalled = false; g_liteFakeShutdownCalled = false;
g_liteFakeDeadServerSubstr.clear(); g_liteFakeDeadServerSubstr.clear();
g_liteFakeWarmupServerSubstr.clear();
} }
inline char* liteFakeDup(const char* s) inline char* liteFakeDup(const char* s)
@@ -74,9 +77,18 @@ inline char* liteFakeInitFromPhrase(bool, const char*, const char*,
// g_liteFakeDeadServerSubstr, so tests can exercise the controller's open-with-failover. // g_liteFakeDeadServerSubstr, so tests can exercise the controller's open-with-failover.
inline char* liteFakeInitExisting(bool, const char* server) inline char* liteFakeInitExisting(bool, const char* server)
{ {
if (!g_liteFakeDeadServerSubstr.empty() && server && if (server) {
std::string(server).find(g_liteFakeDeadServerSubstr) != std::string::npos) { const std::string s(server);
return liteFakeDup("Error: could not connect to server"); // bridge maps to ok=false if (!g_liteFakeWarmupServerSubstr.empty() &&
s.find(g_liteFakeWarmupServerSubstr) != std::string::npos) {
// Server is up but warming up — mirrors the real -28 / "Activating best chain".
return liteFakeDup("Error: grpc-message: \"error requesting block: -28: "
"Activating best chain...\"");
}
if (!g_liteFakeDeadServerSubstr.empty() &&
s.find(g_liteFakeDeadServerSubstr) != std::string::npos) {
return liteFakeDup("Error: could not connect to server"); // bridge maps to ok=false
}
} }
return liteFakeDup("OK"); return liteFakeDup("OK");
} }

View File

@@ -3599,8 +3599,26 @@ void testLiteWalletControllerOpenFailover()
EXPECT_TRUE(!controller.lastOpenError().empty()); EXPECT_TRUE(!controller.lastOpenError().empty());
} }
// A warming-up (-28) server is flagged so the caller can retry sooner. Preferred server is
// warming, the fallback is dead -> open fails but lastOpenWasWarmup() is set.
{
dragonx::test::resetLiteFakeCounters();
dragonx::test::g_liteFakeWalletExists = true;
dragonx::test::g_liteFakeWarmupServerSubstr = "dead.example"; // preferred (sticky) is warming
dragonx::test::g_liteFakeDeadServerSubstr = "good.example"; // fallback unreachable
LiteWalletController controller(liteCaps, conn,
LiteClientBridge::fromApi(dragonx::test::makeFakeLiteApi()));
EXPECT_TRUE(controller.beginOpenExisting());
for (int i = 0; i < 400 && controller.openInProgress(); ++i)
std::this_thread::sleep_for(std::chrono::milliseconds(5));
controller.pumpAsyncOpen();
EXPECT_FALSE(controller.walletOpen());
EXPECT_TRUE(controller.lastOpenWasWarmup());
}
dragonx::test::g_liteFakeWalletExists = false; dragonx::test::g_liteFakeWalletExists = false;
dragonx::test::g_liteFakeDeadServerSubstr.clear(); dragonx::test::g_liteFakeDeadServerSubstr.clear();
dragonx::test::g_liteFakeWarmupServerSubstr.clear();
} }
// M2: a parsed lite refresh bundle maps through to the app's WalletState (the last hop // M2: a parsed lite refresh bundle maps through to the app's WalletState (the last hop