Redesign benchmark to measure sustained (thermally throttled) hashrate

instead of initial burst performance. Previously the benchmark used a
fixed 20s warmup + 10s peak measurement, which reported inflated
results on thermally constrained hardware (e.g. 179 H/s vs actual
sustained 117 H/s on a MacBook Pro).

- Adaptive warmup with stability detection: mine for at least 90s,
  then compare rolling 10s hashrate windows. Require 3 consecutive
  windows within 5% before declaring thermal equilibrium (cap 300s)
- Average-based measurement: record mean hashrate over 30s instead
  of peak, reflecting real sustained throughput
- Start candidates at half the system cores — lower thread counts
  are rarely optimal and waste time warming up
- Add CoolingDown phase: 5s idle pause between tests so each starts
  from a similar thermal baseline
- Adaptive time estimates: use observed warmup durations from
  completed tests to predict remaining time
- UI shows Stabilizing when waiting for thermal equilibrium past
  the minimum warmup, Cooling during idle pauses"
This commit is contained in:
2026-04-06 13:51:56 -05:00
parent 3ff62ca248
commit 821c54ba2b
2 changed files with 158 additions and 42 deletions

View File

@@ -45,7 +45,7 @@ static int s_earnings_filter = 0;
// Thread benchmark state
struct ThreadBenchmark {
enum class Phase { Idle, Starting, WarmingUp, Measuring, Advancing, Done };
enum class Phase { Idle, Starting, WarmingUp, Measuring, Advancing, CoolingDown, Done };
Phase phase = Phase::Idle;
std::vector<int> candidates;
@@ -58,51 +58,94 @@ struct ThreadBenchmark {
std::vector<Result> results;
float phase_timer = 0.0f;
static constexpr float WARMUP_SECS = 20.0f;
static constexpr float MEASURE_SECS = 10.0f;
double best_sample = 0.0; // best hashrate_10s during current measurement window
int sample_count = 0; // number of non-zero hashrate samples collected
// Warmup: wait at least MIN then check for hashrate stability; cap at MAX.
// Laptops need 90s+ for thermal throttling to fully manifest.
static constexpr float MIN_WARMUP_SECS = 90.0f;
static constexpr float MAX_WARMUP_SECS = 300.0f;
static constexpr float MEASURE_SECS = 30.0f;
static constexpr float COOLDOWN_SECS = 5.0f;
// Stability detection — compare rolling 10s hashrate windows.
// Require STABLE_WINDOWS_NEEDED consecutive stable readings.
static constexpr float STABILITY_WINDOW_SECS = 10.0f;
static constexpr float STABILITY_THRESHOLD = 0.05f; // 5% change → stable
static constexpr int STABLE_WINDOWS_NEEDED = 3;
double prev_window_avg = 0.0;
double window_sum = 0.0;
int window_samples = 0;
float window_timer = 0.0f;
int consecutive_stable = 0; // count of consecutive stable windows
// Measurement: average-based (sustained performance, not peak burst)
double measure_sum = 0.0;
int measure_samples = 0;
int optimal_threads = 0;
double optimal_hashrate = 0.0;
bool was_pool_running = false;
int prev_threads = 0;
// Track actual warmup durations for better time estimates
float total_warmup_secs = 0.0f;
void reset() {
phase = Phase::Idle;
candidates.clear();
current_index = 0;
results.clear();
phase_timer = 0.0f;
best_sample = 0.0;
sample_count = 0;
prev_window_avg = 0.0;
window_sum = 0.0;
window_samples = 0;
window_timer = 0.0f;
consecutive_stable = 0;
measure_sum = 0.0;
measure_samples = 0;
optimal_threads = 0;
optimal_hashrate = 0.0;
was_pool_running = false;
prev_threads = 0;
total_warmup_secs = 0.0f;
}
void buildCandidates(int max_threads) {
candidates.clear();
if (max_threads <= 16) {
for (int t = 1; t <= max_threads; t++)
candidates.push_back(t);
} else {
// Sample: 1, then every ceil(max/10) step, always including max
int step = std::max(1, (max_threads + 9) / 10);
for (int t = 1; t <= max_threads; t += step)
candidates.push_back(t);
if (candidates.back() != max_threads)
candidates.push_back(max_threads);
}
// Start at half the cores — lower counts are rarely optimal and
// testing them first would waste time warming up the CPU before
// reaching the thread counts that actually matter.
int start = std::max(1, max_threads / 2);
for (int t = start; t <= max_threads; t++)
candidates.push_back(t);
}
/// Average warmup duration based on tests completed so far
float avgWarmupSecs() const {
if (current_index > 0)
return total_warmup_secs / (float)current_index;
return (MIN_WARMUP_SECS + MAX_WARMUP_SECS) * 0.5f; // initial estimate
}
/// Estimated seconds per test (uses observed warmup average)
float perTestSecs() const {
return avgWarmupSecs() + MEASURE_SECS;
}
float totalEstimatedSecs() const {
return (float)candidates.size() * (WARMUP_SECS + MEASURE_SECS);
int n = (int)candidates.size();
if (n <= 0) return 0.0f;
// Completed tests use actual time; remaining use estimate
float completed_time = total_warmup_secs
+ (float)current_index * (MEASURE_SECS + COOLDOWN_SECS);
int remaining = n - current_index;
float remaining_time = (float)remaining * (avgWarmupSecs() + MEASURE_SECS)
+ (float)std::max(0, remaining - 1) * COOLDOWN_SECS;
return completed_time + remaining_time;
}
float elapsedSecs() const {
float completed = (float)current_index * (WARMUP_SECS + MEASURE_SECS);
float completed = total_warmup_secs
+ (float)current_index * (MEASURE_SECS + COOLDOWN_SECS);
return completed + phase_timer;
}
@@ -110,6 +153,14 @@ struct ThreadBenchmark {
float total = totalEstimatedSecs();
return (total > 0.0f) ? std::min(1.0f, elapsedSecs() / total) : 0.0f;
}
void resetStabilityTracking() {
prev_window_avg = 0.0;
window_sum = 0.0;
window_samples = 0;
window_timer = 0.0f;
consecutive_stable = 0;
}
};
static ThreadBenchmark s_benchmark;
@@ -339,34 +390,75 @@ static void RenderMiningTabContent(App* app)
app->startPoolMining(t);
s_benchmark.phase = ThreadBenchmark::Phase::WarmingUp;
s_benchmark.phase_timer = 0.0f;
s_benchmark.best_sample = 0.0;
s_benchmark.sample_count = 0;
s_benchmark.resetStabilityTracking();
s_benchmark.measure_sum = 0.0;
s_benchmark.measure_samples = 0;
} else {
s_benchmark.phase = ThreadBenchmark::Phase::Done;
}
break;
case ThreadBenchmark::Phase::WarmingUp:
if (s_benchmark.phase_timer >= ThreadBenchmark::WARMUP_SECS) {
case ThreadBenchmark::Phase::WarmingUp: {
// Adaptive warmup: wait for hashrate to stabilize (thermal steady state).
// After MIN_WARMUP (90s), compare rolling 10s hashrate windows.
// Require 3 consecutive windows within 5% to confirm equilibrium.
// Laptops can take 2-3+ minutes for thermal throttling to fully
// manifest, so a single stable window isn't sufficient.
bool past_min = s_benchmark.phase_timer >= ThreadBenchmark::MIN_WARMUP_SECS;
bool past_max = s_benchmark.phase_timer >= ThreadBenchmark::MAX_WARMUP_SECS;
// Accumulate samples into current window
if (state.pool_mining.hashrate_10s > 0.0) {
s_benchmark.window_sum += state.pool_mining.hashrate_10s;
s_benchmark.window_samples++;
}
s_benchmark.window_timer += dt;
bool stable = false;
if (past_min && s_benchmark.window_timer >= ThreadBenchmark::STABILITY_WINDOW_SECS
&& s_benchmark.window_samples > 0) {
double current_avg = s_benchmark.window_sum / s_benchmark.window_samples;
if (s_benchmark.prev_window_avg > 0.0) {
double change = std::abs(current_avg - s_benchmark.prev_window_avg)
/ s_benchmark.prev_window_avg;
if (change < ThreadBenchmark::STABILITY_THRESHOLD)
s_benchmark.consecutive_stable++;
else
s_benchmark.consecutive_stable = 0; // reset on instability
if (s_benchmark.consecutive_stable >= ThreadBenchmark::STABLE_WINDOWS_NEEDED)
stable = true;
}
// Shift window
s_benchmark.prev_window_avg = current_avg;
s_benchmark.window_sum = 0.0;
s_benchmark.window_samples = 0;
s_benchmark.window_timer = 0.0f;
}
if (stable || past_max) {
s_benchmark.total_warmup_secs += s_benchmark.phase_timer;
s_benchmark.phase = ThreadBenchmark::Phase::Measuring;
s_benchmark.phase_timer = 0.0f;
s_benchmark.best_sample = 0.0;
s_benchmark.sample_count = 0;
s_benchmark.measure_sum = 0.0;
s_benchmark.measure_samples = 0;
}
break;
}
case ThreadBenchmark::Phase::Measuring:
// Sample hashrate during measurement window
// Sample average hashrate — reflects sustained (thermally throttled) performance
if (state.pool_mining.hashrate_10s > 0.0) {
s_benchmark.sample_count++;
if (state.pool_mining.hashrate_10s > s_benchmark.best_sample)
s_benchmark.best_sample = state.pool_mining.hashrate_10s;
s_benchmark.measure_sum += state.pool_mining.hashrate_10s;
s_benchmark.measure_samples++;
}
if (s_benchmark.phase_timer >= ThreadBenchmark::MEASURE_SECS) {
int t = s_benchmark.candidates[s_benchmark.current_index];
s_benchmark.results.push_back({t, s_benchmark.best_sample});
if (s_benchmark.best_sample > s_benchmark.optimal_hashrate) {
s_benchmark.optimal_hashrate = s_benchmark.best_sample;
double avg = (s_benchmark.measure_samples > 0)
? s_benchmark.measure_sum / s_benchmark.measure_samples
: 0.0;
s_benchmark.results.push_back({t, avg});
if (avg > s_benchmark.optimal_hashrate) {
s_benchmark.optimal_hashrate = avg;
s_benchmark.optimal_threads = t;
}
s_benchmark.phase = ThreadBenchmark::Phase::Advancing;
@@ -378,7 +470,9 @@ static void RenderMiningTabContent(App* app)
app->stopPoolMining();
s_benchmark.current_index++;
if (s_benchmark.current_index < (int)s_benchmark.candidates.size()) {
s_benchmark.phase = ThreadBenchmark::Phase::Starting;
// Cool down before next test to reduce thermal throttling bias
s_benchmark.phase = ThreadBenchmark::Phase::CoolingDown;
s_benchmark.phase_timer = 0.0f;
} else {
// Done — apply optimal thread count
s_benchmark.phase = ThreadBenchmark::Phase::Done;
@@ -394,6 +488,14 @@ static void RenderMiningTabContent(App* app)
}
break;
case ThreadBenchmark::Phase::CoolingDown:
// Idle pause — let CPU temps drop before starting next test
if (s_benchmark.phase_timer >= ThreadBenchmark::COOLDOWN_SECS) {
s_benchmark.phase = ThreadBenchmark::Phase::Starting;
s_benchmark.phase_timer = 0.0f;
}
break;
default:
break;
}
@@ -1297,26 +1399,38 @@ static void RenderMiningTabContent(App* app)
// Status text above bar
int ct = s_benchmark.current_index < (int)s_benchmark.candidates.size()
? s_benchmark.candidates[s_benchmark.current_index] : 0;
// Estimated remaining time
// Estimated remaining time (uses observed warmup for better accuracy)
int remaining_tests = (int)s_benchmark.candidates.size() - s_benchmark.current_index;
float elapsed_in_phase = s_benchmark.phase_timer;
float phase_total = (s_benchmark.phase == ThreadBenchmark::Phase::WarmingUp)
? ThreadBenchmark::WARMUP_SECS
: ThreadBenchmark::MEASURE_SECS;
float phase_total;
if (s_benchmark.phase == ThreadBenchmark::Phase::WarmingUp)
phase_total = s_benchmark.avgWarmupSecs(); // adaptive estimate
else if (s_benchmark.phase == ThreadBenchmark::Phase::CoolingDown)
phase_total = ThreadBenchmark::COOLDOWN_SECS;
else
phase_total = ThreadBenchmark::MEASURE_SECS;
float remaining_in_current = std::max(0.0f, phase_total - elapsed_in_phase);
// Remaining tests after current each need warmup + measure
// Remaining tests after current each need warmup + measure + cooldown
float est_secs = remaining_in_current
+ (remaining_tests - 1) * (ThreadBenchmark::WARMUP_SECS + ThreadBenchmark::MEASURE_SECS);
+ (remaining_tests - 1) * (s_benchmark.avgWarmupSecs() + ThreadBenchmark::MEASURE_SECS + ThreadBenchmark::COOLDOWN_SECS);
int est_min = (int)(est_secs / 60.0f);
int est_sec = (int)est_secs % 60;
const char* phase_label;
if (s_benchmark.phase == ThreadBenchmark::Phase::CoolingDown)
phase_label = TR("mining_benchmark_cooling");
else if (s_benchmark.phase == ThreadBenchmark::Phase::WarmingUp
&& s_benchmark.phase_timer >= ThreadBenchmark::MIN_WARMUP_SECS)
phase_label = TR("mining_benchmark_stabilizing");
else
phase_label = TR("mining_benchmark_testing");
if (est_min > 0)
snprintf(buf, sizeof(buf), "%s %d/%d (%dt) ~%dm%ds",
TR("mining_benchmark_testing"),
phase_label,
s_benchmark.current_index + 1,
(int)s_benchmark.candidates.size(), ct, est_min, est_sec);
else
snprintf(buf, sizeof(buf), "%s %d/%d (%dt) ~%ds",
TR("mining_benchmark_testing"),
phase_label,
s_benchmark.current_index + 1,
(int)s_benchmark.candidates.size(), ct, est_sec);
ImVec2 txtSz = capFont->CalcTextSizeA(capFont->LegacySize, FLT_MAX, 0, buf);

View File

@@ -879,6 +879,8 @@ void I18n::loadBuiltinEnglish()
strings_["mining_reset_defaults"] = "Reset Defaults";
strings_["mining_benchmark_tooltip"] = "Find optimal thread count for this CPU";
strings_["mining_benchmark_testing"] = "Testing";
strings_["mining_benchmark_cooling"] = "Cooling";
strings_["mining_benchmark_stabilizing"] = "Stabilizing";
strings_["mining_benchmark_cancel"] = "Cancel benchmark";
strings_["mining_benchmark_result"] = "Optimal";
strings_["mining_benchmark_dismiss"] = "Dismiss";