Redesign benchmark to measure sustained (thermally throttled) hashrate

instead of initial burst performance. Previously the benchmark used a fixed 20s warmup + 10s peak measurement, which reported inflated results on thermally constrained hardware (e.g. 179 H/s vs actual sustained 117 H/s on a MacBook Pro). - Adaptive warmup with stability detection: mine for at least 90s, then compare rolling 10s hashrate windows. Require 3 consecutive windows within 5% before declaring thermal equilibrium (cap 300s) - Average-based measurement: record mean hashrate over 30s instead of peak, reflecting real sustained throughput - Start candidates at half the system cores — lower thread counts are rarely optimal and waste time warming up - Add CoolingDown phase: 5s idle pause between tests so each starts from a similar thermal baseline - Adaptive time estimates: use observed warmup durations from completed tests to predict remaining time - UI shows Stabilizing when waiting for thermal equilibrium past the minimum warmup, Cooling during idle pauses"
2026-04-06 13:51:56 -05:00
parent 3ff62ca248
commit 821c54ba2b
2 changed files with 158 additions and 42 deletions
--- a/src/ui/windows/mining_tab.cpp
+++ b/src/ui/windows/mining_tab.cpp
@@ -45,7 +45,7 @@ static int s_earnings_filter = 0;

 // Thread benchmark state
 struct ThreadBenchmark {
-    enum class Phase { Idle, Starting, WarmingUp, Measuring, Advancing, Done };
+    enum class Phase { Idle, Starting, WarmingUp, Measuring, Advancing, CoolingDown, Done };
    Phase phase = Phase::Idle;

    std::vector<int> candidates;
@@ -58,51 +58,94 @@ struct ThreadBenchmark {
    std::vector<Result> results;

    float phase_timer = 0.0f;
-    static constexpr float WARMUP_SECS  = 20.0f;
-    static constexpr float MEASURE_SECS = 10.0f;
-    double best_sample = 0.0;   // best hashrate_10s during current measurement window
-    int sample_count = 0;       // number of non-zero hashrate samples collected
+
+    // Warmup: wait at least MIN then check for hashrate stability; cap at MAX.
+    // Laptops need 90s+ for thermal throttling to fully manifest.
+    static constexpr float MIN_WARMUP_SECS = 90.0f;
+    static constexpr float MAX_WARMUP_SECS = 300.0f;
+    static constexpr float MEASURE_SECS    = 30.0f;
+    static constexpr float COOLDOWN_SECS   = 5.0f;
+
+    // Stability detection — compare rolling 10s hashrate windows.
+    // Require STABLE_WINDOWS_NEEDED consecutive stable readings.
+    static constexpr float STABILITY_WINDOW_SECS  = 10.0f;
+    static constexpr float STABILITY_THRESHOLD     = 0.05f;  // 5% change → stable
+    static constexpr int   STABLE_WINDOWS_NEEDED   = 3;
+    double prev_window_avg    = 0.0;
+    double window_sum         = 0.0;
+    int    window_samples     = 0;
+    float  window_timer       = 0.0f;
+    int    consecutive_stable = 0;  // count of consecutive stable windows
+
+    // Measurement: average-based (sustained performance, not peak burst)
+    double measure_sum    = 0.0;
+    int    measure_samples = 0;

    int optimal_threads  = 0;
    double optimal_hashrate = 0.0;
    bool was_pool_running = false;
    int  prev_threads     = 0;

+    // Track actual warmup durations for better time estimates
+    float total_warmup_secs = 0.0f;
+
    void reset() {
        phase = Phase::Idle;
        candidates.clear();
        current_index = 0;
        results.clear();
        phase_timer = 0.0f;
-        best_sample = 0.0;
-        sample_count = 0;
+        prev_window_avg = 0.0;
+        window_sum = 0.0;
+        window_samples = 0;
+        window_timer = 0.0f;
+        consecutive_stable = 0;
+        measure_sum = 0.0;
+        measure_samples = 0;
        optimal_threads = 0;
        optimal_hashrate = 0.0;
        was_pool_running = false;
        prev_threads = 0;
+        total_warmup_secs = 0.0f;
    }

    void buildCandidates(int max_threads) {
        candidates.clear();
-        if (max_threads <= 16) {
-            for (int t = 1; t <= max_threads; t++)
-                candidates.push_back(t);
-        } else {
-            // Sample: 1, then every ceil(max/10) step, always including max
-            int step = std::max(1, (max_threads + 9) / 10);
-            for (int t = 1; t <= max_threads; t += step)
-                candidates.push_back(t);
-            if (candidates.back() != max_threads)
-                candidates.push_back(max_threads);
-        }
+        // Start at half the cores — lower counts are rarely optimal and
+        // testing them first would waste time warming up the CPU before
+        // reaching the thread counts that actually matter.
+        int start = std::max(1, max_threads / 2);
+        for (int t = start; t <= max_threads; t++)
+            candidates.push_back(t);
+    }
+
+    /// Average warmup duration based on tests completed so far
+    float avgWarmupSecs() const {
+        if (current_index > 0)
+            return total_warmup_secs / (float)current_index;
+        return (MIN_WARMUP_SECS + MAX_WARMUP_SECS) * 0.5f;  // initial estimate
+    }
+
+    /// Estimated seconds per test (uses observed warmup average)
+    float perTestSecs() const {
+        return avgWarmupSecs() + MEASURE_SECS;
    }

    float totalEstimatedSecs() const {
-        return (float)candidates.size() * (WARMUP_SECS + MEASURE_SECS);
+        int n = (int)candidates.size();
+        if (n <= 0) return 0.0f;
+        // Completed tests use actual time; remaining use estimate
+        float completed_time = total_warmup_secs
+                             + (float)current_index * (MEASURE_SECS + COOLDOWN_SECS);
+        int remaining = n - current_index;
+        float remaining_time = (float)remaining * (avgWarmupSecs() + MEASURE_SECS)
+                             + (float)std::max(0, remaining - 1) * COOLDOWN_SECS;
+        return completed_time + remaining_time;
    }

    float elapsedSecs() const {
-        float completed = (float)current_index * (WARMUP_SECS + MEASURE_SECS);
+        float completed = total_warmup_secs
+                        + (float)current_index * (MEASURE_SECS + COOLDOWN_SECS);
        return completed + phase_timer;
    }

@@ -110,6 +153,14 @@ struct ThreadBenchmark {
        float total = totalEstimatedSecs();
        return (total > 0.0f) ? std::min(1.0f, elapsedSecs() / total) : 0.0f;
    }
+
+    void resetStabilityTracking() {
+        prev_window_avg = 0.0;
+        window_sum = 0.0;
+        window_samples = 0;
+        window_timer = 0.0f;
+        consecutive_stable = 0;
+    }
 };
 static ThreadBenchmark s_benchmark;

@@ -339,34 +390,75 @@ static void RenderMiningTabContent(App* app)
                app->startPoolMining(t);
                s_benchmark.phase = ThreadBenchmark::Phase::WarmingUp;
                s_benchmark.phase_timer = 0.0f;
-                s_benchmark.best_sample = 0.0;
-                s_benchmark.sample_count = 0;
+                s_benchmark.resetStabilityTracking();
+                s_benchmark.measure_sum = 0.0;
+                s_benchmark.measure_samples = 0;
            } else {
                s_benchmark.phase = ThreadBenchmark::Phase::Done;
            }
            break;

-        case ThreadBenchmark::Phase::WarmingUp:
-            if (s_benchmark.phase_timer >= ThreadBenchmark::WARMUP_SECS) {
+        case ThreadBenchmark::Phase::WarmingUp: {
+            // Adaptive warmup: wait for hashrate to stabilize (thermal steady state).
+            // After MIN_WARMUP (90s), compare rolling 10s hashrate windows.
+            // Require 3 consecutive windows within 5% to confirm equilibrium.
+            // Laptops can take 2-3+ minutes for thermal throttling to fully
+            // manifest, so a single stable window isn't sufficient.
+            bool past_min = s_benchmark.phase_timer >= ThreadBenchmark::MIN_WARMUP_SECS;
+            bool past_max = s_benchmark.phase_timer >= ThreadBenchmark::MAX_WARMUP_SECS;
+
+            // Accumulate samples into current window
+            if (state.pool_mining.hashrate_10s > 0.0) {
+                s_benchmark.window_sum += state.pool_mining.hashrate_10s;
+                s_benchmark.window_samples++;
+            }
+            s_benchmark.window_timer += dt;
+
+            bool stable = false;
+            if (past_min && s_benchmark.window_timer >= ThreadBenchmark::STABILITY_WINDOW_SECS
+                && s_benchmark.window_samples > 0) {
+                double current_avg = s_benchmark.window_sum / s_benchmark.window_samples;
+                if (s_benchmark.prev_window_avg > 0.0) {
+                    double change = std::abs(current_avg - s_benchmark.prev_window_avg)
+                                  / s_benchmark.prev_window_avg;
+                    if (change < ThreadBenchmark::STABILITY_THRESHOLD)
+                        s_benchmark.consecutive_stable++;
+                    else
+                        s_benchmark.consecutive_stable = 0;  // reset on instability
+                    if (s_benchmark.consecutive_stable >= ThreadBenchmark::STABLE_WINDOWS_NEEDED)
+                        stable = true;
+                }
+                // Shift window
+                s_benchmark.prev_window_avg = current_avg;
+                s_benchmark.window_sum = 0.0;
+                s_benchmark.window_samples = 0;
+                s_benchmark.window_timer = 0.0f;
+            }
+
+            if (stable || past_max) {
+                s_benchmark.total_warmup_secs += s_benchmark.phase_timer;
                s_benchmark.phase = ThreadBenchmark::Phase::Measuring;
                s_benchmark.phase_timer = 0.0f;
-                s_benchmark.best_sample = 0.0;
-                s_benchmark.sample_count = 0;
+                s_benchmark.measure_sum = 0.0;
+                s_benchmark.measure_samples = 0;
            }
            break;
+        }

        case ThreadBenchmark::Phase::Measuring:
-            // Sample hashrate during measurement window
+            // Sample average hashrate — reflects sustained (thermally throttled) performance
            if (state.pool_mining.hashrate_10s > 0.0) {
-                s_benchmark.sample_count++;
-                if (state.pool_mining.hashrate_10s > s_benchmark.best_sample)
-                    s_benchmark.best_sample = state.pool_mining.hashrate_10s;
+                s_benchmark.measure_sum += state.pool_mining.hashrate_10s;
+                s_benchmark.measure_samples++;
            }
            if (s_benchmark.phase_timer >= ThreadBenchmark::MEASURE_SECS) {
                int t = s_benchmark.candidates[s_benchmark.current_index];
-                s_benchmark.results.push_back({t, s_benchmark.best_sample});
-                if (s_benchmark.best_sample > s_benchmark.optimal_hashrate) {
-                    s_benchmark.optimal_hashrate = s_benchmark.best_sample;
+                double avg = (s_benchmark.measure_samples > 0)
+                           ? s_benchmark.measure_sum / s_benchmark.measure_samples
+                           : 0.0;
+                s_benchmark.results.push_back({t, avg});
+                if (avg > s_benchmark.optimal_hashrate) {
+                    s_benchmark.optimal_hashrate = avg;
                    s_benchmark.optimal_threads = t;
                }
                s_benchmark.phase = ThreadBenchmark::Phase::Advancing;
@@ -378,7 +470,9 @@ static void RenderMiningTabContent(App* app)
            app->stopPoolMining();
            s_benchmark.current_index++;
            if (s_benchmark.current_index < (int)s_benchmark.candidates.size()) {
-                s_benchmark.phase = ThreadBenchmark::Phase::Starting;
+                // Cool down before next test to reduce thermal throttling bias
+                s_benchmark.phase = ThreadBenchmark::Phase::CoolingDown;
+                s_benchmark.phase_timer = 0.0f;
            } else {
                // Done — apply optimal thread count
                s_benchmark.phase = ThreadBenchmark::Phase::Done;
@@ -394,6 +488,14 @@ static void RenderMiningTabContent(App* app)
            }
            break;

+        case ThreadBenchmark::Phase::CoolingDown:
+            // Idle pause — let CPU temps drop before starting next test
+            if (s_benchmark.phase_timer >= ThreadBenchmark::COOLDOWN_SECS) {
+                s_benchmark.phase = ThreadBenchmark::Phase::Starting;
+                s_benchmark.phase_timer = 0.0f;
+            }
+            break;
+
        default:
            break;
        }
@@ -1297,26 +1399,38 @@ static void RenderMiningTabContent(App* app)
                    // Status text above bar
                    int ct = s_benchmark.current_index < (int)s_benchmark.candidates.size()
                           ? s_benchmark.candidates[s_benchmark.current_index] : 0;
-                    // Estimated remaining time
+                    // Estimated remaining time (uses observed warmup for better accuracy)
                    int remaining_tests = (int)s_benchmark.candidates.size() - s_benchmark.current_index;
                    float elapsed_in_phase = s_benchmark.phase_timer;
-                    float phase_total = (s_benchmark.phase == ThreadBenchmark::Phase::WarmingUp)
-                                      ? ThreadBenchmark::WARMUP_SECS
-                                      : ThreadBenchmark::MEASURE_SECS;
+                    float phase_total;
+                    if (s_benchmark.phase == ThreadBenchmark::Phase::WarmingUp)
+                        phase_total = s_benchmark.avgWarmupSecs();  // adaptive estimate
+                    else if (s_benchmark.phase == ThreadBenchmark::Phase::CoolingDown)
+                        phase_total = ThreadBenchmark::COOLDOWN_SECS;
+                    else
+                        phase_total = ThreadBenchmark::MEASURE_SECS;
                    float remaining_in_current = std::max(0.0f, phase_total - elapsed_in_phase);
-                    // Remaining tests after current each need warmup + measure
+                    // Remaining tests after current each need warmup + measure + cooldown
                    float est_secs = remaining_in_current
-                                   + (remaining_tests - 1) * (ThreadBenchmark::WARMUP_SECS + ThreadBenchmark::MEASURE_SECS);
+                                   + (remaining_tests - 1) * (s_benchmark.avgWarmupSecs() + ThreadBenchmark::MEASURE_SECS + ThreadBenchmark::COOLDOWN_SECS);
                    int est_min = (int)(est_secs / 60.0f);
                    int est_sec = (int)est_secs % 60;
+                    const char* phase_label;
+                    if (s_benchmark.phase == ThreadBenchmark::Phase::CoolingDown)
+                        phase_label = TR("mining_benchmark_cooling");
+                    else if (s_benchmark.phase == ThreadBenchmark::Phase::WarmingUp
+                             && s_benchmark.phase_timer >= ThreadBenchmark::MIN_WARMUP_SECS)
+                        phase_label = TR("mining_benchmark_stabilizing");
+                    else
+                        phase_label = TR("mining_benchmark_testing");
                    if (est_min > 0)
                        snprintf(buf, sizeof(buf), "%s %d/%d (%dt) ~%dm%ds",
-                                 TR("mining_benchmark_testing"),
+                                 phase_label,
                                 s_benchmark.current_index + 1,
                                 (int)s_benchmark.candidates.size(), ct, est_min, est_sec);
                    else
                        snprintf(buf, sizeof(buf), "%s %d/%d (%dt) ~%ds",
-                                 TR("mining_benchmark_testing"),
+                                 phase_label,
                                 s_benchmark.current_index + 1,
                                 (int)s_benchmark.candidates.size(), ct, est_sec);
                    ImVec2 txtSz = capFont->CalcTextSizeA(capFont->LegacySize, FLT_MAX, 0, buf);
--- a/src/util/i18n.cpp
+++ b/src/util/i18n.cpp
@@ -879,6 +879,8 @@ void I18n::loadBuiltinEnglish()
        strings_["mining_reset_defaults"] = "Reset Defaults";
        strings_["mining_benchmark_tooltip"] = "Find optimal thread count for this CPU";
        strings_["mining_benchmark_testing"] = "Testing";
+        strings_["mining_benchmark_cooling"] = "Cooling";
+        strings_["mining_benchmark_stabilizing"] = "Stabilizing";
        strings_["mining_benchmark_cancel"] = "Cancel benchmark";
        strings_["mining_benchmark_result"] = "Optimal";
        strings_["mining_benchmark_dismiss"] = "Dismiss";