From 9ee8f9a43b95d6573d31bafa7704a91c38edfc71 Mon Sep 17 00:00:00 2001 From: DanS Date: Fri, 12 Jun 2026 01:53:30 -0500 Subject: [PATCH] fix(send): restart the fast-lane worker on reconnect so the opid poll runs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A completed send could spin forever on "Waiting for operation (N)". Root cause: onDisconnected() stopped fast_worker_ but kept the unique_ptr, so onConnected()'s `if (!fast_worker_)` guard never restarted it — after the first reconnect (daemon warmup, restart, any RPC blip) the fast lane stayed dead for the whole session. The opid poll was the only fast_worker_ user that posted to it directly with no fallback, so it alone broke: its post() landed on a stopped thread, the result MainCb never ran, opid_poll_in_progress_ stuck true, and the poll never fired again — leaving the operation (already "success" on the daemon, with a txid) untracked. Two fixes: - onDisconnected() now reset()s fast_worker_ after stop(), so onConnected recreates and starts a fresh one (restores the fast lane for all its users, not just the poll). - the opid poll now falls back to worker_ when the fast lane isn't running, matching every other fast_worker_ call site — defense in depth. Co-Authored-By: Claude Opus 4.8 --- src/app.cpp | 11 ++++++++--- src/app_network.cpp | 5 +++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/app.cpp b/src/app.cpp index 3bb21f8..1d9fe8b 100644 --- a/src/app.cpp +++ b/src/app.cpp @@ -760,13 +760,18 @@ void App::update() } } - // Poll pending z_sendmany operations for completion (full-node opid flow; lite has none) + // Poll pending z_sendmany operations for completion (full-node opid flow; lite has none). + // Prefer the fast lane but fall back to the main worker (mirrors every other fast_worker_ user) + // so a torn-down/not-yet-restarted fast lane can't silently strand the poll on "Waiting for + // operation" — the symptom when fast_worker_ was stopped on reconnect and never came back. + rpc::RPCWorker* opidWorker = (fast_worker_ && fast_worker_->isRunning()) + ? fast_worker_.get() : worker_.get(); if (network_refresh_.isDue(RefreshTimer::Opid) && !pending_opids_.empty() - && rpcConnected && fast_worker_ && !opid_poll_in_progress_) { + && rpcConnected && opidWorker && !opid_poll_in_progress_) { network_refresh_.reset(RefreshTimer::Opid); auto opids = pending_opids_; // copy for worker thread opid_poll_in_progress_ = true; - fast_worker_->post([this, opids]() -> rpc::RPCWorker::MainCb { + opidWorker->post([this, opids]() -> rpc::RPCWorker::MainCb { auto* rpc = (fast_rpc_ && fast_rpc_->isConnected()) ? fast_rpc_.get() : rpc_.get(); if (!rpc) return [this](){ opid_poll_in_progress_ = false; }; json ids = json::array(); diff --git a/src/app_network.cpp b/src/app_network.cpp index 544b119..d9bb1b2 100644 --- a/src/app_network.cpp +++ b/src/app_network.cpp @@ -582,6 +582,11 @@ void App::onDisconnected(const std::string& reason) if (fast_rpc_) fast_rpc_->requestAbort(); if (fast_worker_) { fast_worker_->stop(); + // Drop the stopped worker so onConnected recreates and starts a fresh one. Keeping a + // stopped-but-present worker would defeat onConnected's `if (!fast_worker_)` guard, leaving + // the fast lane dead for the rest of the session — which silently stalls the opid poll + // (its post() never runs, so a completed send spins on "Waiting for operation"). + fast_worker_.reset(); } if (fast_rpc_) { fast_rpc_->disconnect();