fix(node): don't get stranded when the daemon can't start on startup

Two failure modes left the wallet stuck on a silent "connecting / Starting
dragonxd…" spinner with no path forward:

1. Stale external-daemon latch. EmbeddedDaemon::start() sets
   external_daemon_detected_ whenever the RPC port was busy at a prior attempt
   and never re-checks it, so tryConnect's no-config branch trusted that latch
   and waited forever for a config the phantom would never write — even after a
   stale/half-dead process freed the port. Now the port is re-evaluated LIVE
   (EmbeddedDaemon::isRpcPortInUse()) each attempt: if it's genuinely busy we
   keep waiting (and, after a bounded ~20s with no config, warn that whatever
   owns the port isn't a usable DragonX node and how to fix it); if it's free we
   fall through and start our own daemon.

2. Silent start failure. When startEmbeddedDaemon() failed (binary not found,
   Sapling params missing, spawn failure) the status stayed on "Starting
   dragonxd…" with the real reason only in a VERBOSE log. Now the reason
   (daemon_controller_->lastError()) is surfaced once as a sticky error
   notification, with a short "Couldn't start dragonxd" status.

Both counters reset on a successful connect so the messages re-arm for the next
disconnect. Lite is unaffected (tryConnect returns early for lite builds).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-09 20:55:41 -05:00
parent 4a65dce947
commit 41b380449e
3 changed files with 54 additions and 15 deletions

View File

@@ -507,6 +507,10 @@ private:
std::string connection_status_ = "Disconnected"; std::string connection_status_ = "Disconnected";
bool connection_in_progress_ = false; bool connection_in_progress_ = false;
bool remote_rpc_plaintext_warning_shown_ = false; bool remote_rpc_plaintext_warning_shown_ = false;
// Startup daemon-launch diagnostics: bound the "RPC port busy, no config" wait before warning,
// and show the embedded-daemon start failure (binary/params/spawn) only once. Reset on connect.
int daemon_wait_attempts_ = 0;
bool daemon_start_error_shown_ = false;
float loading_timer_ = 0.0f; // spinner animation for loading overlay float loading_timer_ = 0.0f; // spinner animation for loading overlay
// Current page (sidebar navigation) // Current page (sidebar navigation)

View File

@@ -165,6 +165,12 @@ static const char* const kDaemonInitPhases[] = {
"Rescanning", "Rewinding", "Activating", "Verifying", "Loading", "Pruning", "Rescanning", "Rewinding", "Activating", "Verifying", "Loading", "Pruning",
}; };
// How many consecutive "RPC port busy but no config" connect attempts to wait through before
// warning the user that whatever owns the port isn't a usable DragonX node. The core retry runs
// roughly every few seconds, so this is on the order of ~20s — long enough for a real daemon to
// write its config, short enough not to leave the user guessing.
static constexpr int kDaemonWaitWarnAttempts = 4;
// ============================================================================ // ============================================================================
// Connection Management // Connection Management
// ============================================================================ // ============================================================================
@@ -193,19 +199,32 @@ void App::tryConnect()
VERBOSE_LOGF("[connect #%d] No valid config — DRAGONX.conf missing or no rpcuser/rpcpassword (looked at: %s)\n", VERBOSE_LOGF("[connect #%d] No valid config — DRAGONX.conf missing or no rpcuser/rpcpassword (looked at: %s)\n",
connect_attempt, confPath.c_str()); connect_attempt, confPath.c_str());
// If we already know an external daemon is on the port, just wait // Re-evaluate the RPC port LIVE rather than trusting a latched "external daemon detected"
// for the config file to appear (the daemon creates it on first run). // flag: EmbeddedDaemon::start() sets that latch whenever the port was busy at a prior
if (daemon_controller_ && daemon_controller_->externalDaemonDetected()) { // attempt and then never re-checks it, so a stale socket (or a transient squatter that has
// since died) would strand us forever "waiting for config". If the port is genuinely busy,
// a real daemon writes its config shortly and we keep waiting; if it's free, we must start
// our own.
const bool portInUse = daemon::EmbeddedDaemon::isRpcPortInUse();
if (portInUse) {
connection_status_ = TR("sb_waiting_config"); connection_status_ = TR("sb_waiting_config");
VERBOSE_LOGF("[connect #%d] External daemon detected on port, waiting for config file to appear\n", connect_attempt); VERBOSE_LOGF("[connect #%d] RPC port in use but no config yet — waiting for the daemon to write it\n",
connect_attempt);
// After a bounded wait with no config appearing, whatever owns the port is not a usable
// DragonX node (a foreign process, or a stuck/half-dead daemon). Say so once, with the
// action, instead of leaving the user on a silent "waiting" spinner forever.
if (++daemon_wait_attempts_ == kDaemonWaitWarnAttempts) {
ui::Notifications::instance().warning(TR("daemon_port_busy_warn"), 20.0f);
}
network_refresh_.setTimer(services::NetworkRefreshService::Timer::Core, network_refresh_.setTimer(services::NetworkRefreshService::Timer::Core,
services::RefreshScheduler::kCoreDefault - 1.0f); services::RefreshScheduler::kCoreDefault - 1.0f);
return; return;
} }
daemon_wait_attempts_ = 0; // port is free — clear the bounded-wait counter
connection_status_ = TR("sb_no_conf"); connection_status_ = TR("sb_no_conf");
// Try to start embedded daemon if enabled // Port is free → start our own embedded daemon (if enabled).
if (isUsingEmbeddedDaemon() && !isEmbeddedDaemonRunning()) { if (isUsingEmbeddedDaemon() && !isEmbeddedDaemonRunning()) {
connection_status_ = TR("sb_starting_daemon"); connection_status_ = TR("sb_starting_daemon");
if (startEmbeddedDaemon()) { if (startEmbeddedDaemon()) {
@@ -213,16 +232,24 @@ void App::tryConnect()
VERBOSE_LOGF("[connect #%d] Embedded daemon starting, will retry connection...\n", connect_attempt); VERBOSE_LOGF("[connect #%d] Embedded daemon starting, will retry connection...\n", connect_attempt);
network_refresh_.setTimer(services::NetworkRefreshService::Timer::Core, network_refresh_.setTimer(services::NetworkRefreshService::Timer::Core,
services::RefreshScheduler::kCoreDefault - 1.0f); services::RefreshScheduler::kCoreDefault - 1.0f);
} else if (daemon_controller_ && daemon_controller_->externalDaemonDetected()) { } else {
connection_status_ = TR("sb_waiting_config"); // The daemon couldn't be started (binary not found, Sapling params missing, spawn
VERBOSE_LOGF("[connect #%d] External daemon detected but no config yet, will retry...\n", connect_attempt); // failure, …). Surface the actual reason instead of leaving the status stuck on
// "Starting dragonxd…": connection_status_ for the overlay/status bar, plus a
// one-time sticky notification with the full, actionable detail.
std::string detail = daemon_controller_ ? daemon_controller_->lastError() : std::string();
VERBOSE_LOGF("[connect #%d] startEmbeddedDaemon() failed — lastError: %s, binary: %s\n",
connect_attempt, detail.empty() ? "(none)" : detail.c_str(),
daemon::EmbeddedDaemon::findDaemonBinary().c_str());
connection_status_ = TR("sb_daemon_start_failed");
if (!daemon_start_error_shown_) {
daemon_start_error_shown_ = true;
ui::Notifications::instance().error(
detail.empty() ? std::string(TR("sb_daemon_start_failed")) : detail, 30.0f);
}
// Keep retrying: a missing binary/params can be fixed without a restart.
network_refresh_.setTimer(services::NetworkRefreshService::Timer::Core, network_refresh_.setTimer(services::NetworkRefreshService::Timer::Core,
services::RefreshScheduler::kCoreDefault - 1.0f); services::RefreshScheduler::kCoreDefault - 1.0f);
} else {
VERBOSE_LOGF("[connect #%d] startEmbeddedDaemon() failed — lastError: %s, binary: %s\n",
connect_attempt,
daemon_controller_ ? daemon_controller_->lastError().c_str() : "(no daemon object)",
daemon::EmbeddedDaemon::findDaemonBinary().c_str());
} }
} else if (!isUsingEmbeddedDaemon()) { } else if (!isUsingEmbeddedDaemon()) {
VERBOSE_LOGF("[connect #%d] Embedded daemon disabled (using external). No config found at %s\n", VERBOSE_LOGF("[connect #%d] Embedded daemon disabled (using external). No config found at %s\n",
@@ -420,6 +447,8 @@ void App::onConnected()
{ {
state_.connected = true; state_.connected = true;
state_.daemon_initializing = false; // RPC is answering now; clear the "initializing" overlay state_.daemon_initializing = false; // RPC is answering now; clear the "initializing" overlay
daemon_wait_attempts_ = 0; // re-arm the port-busy / start-failure notifications
daemon_start_error_shown_ = false;
connection_status_ = TR("connected"); connection_status_ = TR("connected");
// Reset crash counter on successful connection // Reset crash counter on successful connection

View File

@@ -4,6 +4,7 @@
#include "i18n.h" #include "i18n.h"
#include "platform.h" #include "platform.h"
#include "../config/version.h" // DRAGONX_DEFAULT_RPC_PORT
#include <fstream> #include <fstream>
#include <cstdio> #include <cstdio>
@@ -780,6 +781,11 @@ void I18n::loadBuiltinEnglish()
strings_["sb_connecting_generic"] = "Connecting to daemon..."; strings_["sb_connecting_generic"] = "Connecting to daemon...";
strings_["sb_connecting_err"] = "Connecting to daemon — %s"; strings_["sb_connecting_err"] = "Connecting to daemon — %s";
strings_["sb_daemon_crashed"] = "Daemon crashed %d times"; strings_["sb_daemon_crashed"] = "Daemon crashed %d times";
strings_["sb_daemon_start_failed"] = "Couldn't start dragonxd";
strings_["daemon_port_busy_warn"] =
"Port " DRAGONX_DEFAULT_RPC_PORT " is in use but isn't responding as a DragonX node. "
"Close the program using it (or free the port), then restart — the wallet can't start "
"its own node while the port is taken.";
strings_["sb_extracting_sapling"] = "Extracting Sapling parameters..."; strings_["sb_extracting_sapling"] = "Extracting Sapling parameters...";
strings_["sb_sapling_failed"] = "Failed to extract Sapling parameters."; strings_["sb_sapling_failed"] = "Failed to extract Sapling parameters.";
strings_["sb_sapling_not_found"] = "Sapling parameters not found."; strings_["sb_sapling_not_found"] = "Sapling parameters not found.";