doxy/scheduler-stress-test_8cpp_source.html

 /*
   SchedulerStress(Test)  -  verify scheduler performance characteristics

   Copyright (C)         Lumiera.org
     2024,               Hermann Vosseler <Ichthyostega@web.de>

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
   published by the Free Software Foundation; either version 2 of
   the License, or (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

 * *****************************************************/

 #include "lib/test/run.hpp"
 #include "test-chain-load.hpp"
 #include "stress-test-rig.hpp"
 #include "lib/test/test-helper.hpp"
 #include "vault/gear/scheduler.hpp"
 #include "lib/time/timevalue.hpp"
 #include "lib/format-string.hpp"
 #include "lib/format-cout.hpp"
 #include "lib/util.hpp"

 using test::Test;


 namespace vault{
 namespace gear {
 namespace test {

   using util::_Fmt;
   using util::isLimited;


   /***************************************************************************/
   class SchedulerStress_test : public Test
     {

       virtual void
       run (Arg)
         {
            smokeTest();
            setup_systematicSchedule();
            verify_instrumentation();
            search_breaking_point();
            watch_expenseFunction();
            investigateWorkProcessing();
         }


       void
       smokeTest()
         {
           MARK_TEST_FUN
           TestChainLoad testLoad{512};
           testLoad.configureShape_chain_loadBursts()
                   .buildTopology()
 //                .printTopologyDOT()
                   ;

           auto stats = testLoad.computeGraphStatistics();
           cout << _Fmt{"Test-Load: Nodes: %d  Levels: %d  ∅Node/Level: %3.1f  Forks: %d  Joins: %d"}
                       % stats.nodes
                       % stats.levels
                       % stats.indicators[STAT_NODE].pL
                       % stats.indicators[STAT_FORK].cnt
                       % stats.indicators[STAT_JOIN].cnt
                << endl;

           // while building the calculation-plan graph
           // node hashes were computed, observing dependencies
           size_t expectedHash = testLoad.getHash();

           // some jobs/nodes are marked with a weight-step
           // these can be instructed to spend some CPU time
           auto LOAD_BASE = 500us;
           testLoad.performGraphSynchronously(LOAD_BASE);
           CHECK (testLoad.getHash() == expectedHash);

           double referenceTime = testLoad.calcRuntimeReference(LOAD_BASE);
           cout << "refTime(singleThr): "<<referenceTime/1000<<"ms"<<endl;


           // Perform through Scheduler----------
           BlockFlowAlloc bFlow;
           EngineObserver watch;
           Scheduler scheduler{bFlow, watch};

           double performanceTime =
             testLoad.setupSchedule(scheduler)
                     .withLoadTimeBase(LOAD_BASE)
                     .withJobDeadline(150ms)
                     .withPlanningStep(200us)
                     .withChunkSize(20)
                     .launch_and_wait();

           cout << "runTime(Scheduler): "<<performanceTime/1000<<"ms"<<endl;

           // invocation through Scheduler has reproduced all node hashes
           CHECK (testLoad.getHash() == expectedHash);
         }


       void
       setup_systematicSchedule()
         {
           MARK_TEST_FUN
           TestChainLoad testLoad{64};
           testLoad.configureShape_chain_loadBursts()
                   .buildTopology()
 //                .printTopologyDOT()
 //                .printTopologyStatistics()
                   ;

           auto LOAD_BASE = 500us;
           ComputationalLoad cpuLoad;
           cpuLoad.timeBase = LOAD_BASE;
           cpuLoad.calibrate();

           double micros = cpuLoad.invoke();
           CHECK (micros < 550);
           CHECK (micros > 450);

           // build a schedule sequence based on
           // summing up weight factors, with example concurrency ≔ 4
           uint concurrency = 4;
           auto stepFactors = testLoad.levelScheduleSequence(concurrency).effuse();
           CHECK (stepFactors.size() == 1+testLoad.topLevel());
           CHECK (stepFactors.size() == 26);


           // Build-Performance-test-setup--------
           BlockFlowAlloc bFlow;
           EngineObserver watch;
           Scheduler scheduler{bFlow, watch};

           auto testSetup =
             testLoad.setupSchedule(scheduler)
                     .withLoadTimeBase(LOAD_BASE)
                     .withJobDeadline(50ms)
                     .withUpfrontPlanning();

           auto schedule = testSetup.getScheduleSeq().effuse();
           CHECK (schedule.size() == testLoad.topLevel() + 2);
           CHECK (schedule[ 0] == _uTicks(0ms));
           CHECK (schedule[ 1] == _uTicks(1ms));
           CHECK (schedule[ 2] == _uTicks(2ms));
           //     ....
           CHECK (schedule[24] == _uTicks(24ms));
           CHECK (schedule[25] == _uTicks(25ms));
           CHECK (schedule[26] == _uTicks(26ms));

           // Adapted Schedule----------
           double stressFac = 1.0;
           testSetup.withAdaptedSchedule (stressFac, concurrency);
           schedule =  testSetup.getScheduleSeq().effuse();
           CHECK (schedule.size() == testLoad.topLevel() + 2);
           CHECK (schedule[ 0] == _uTicks(0ms));
           CHECK (schedule[ 1] == _uTicks(0ms));

             // verify the numbers in detail....
           _Fmt stepFmt{"lev:%-2d  stepFac:%-6.3f schedule:%6.3f"};
           auto stepStr = [&](uint i){ return string{stepFmt % i % stepFactors[i>0?i-1:0] % (_raw(schedule[i])/1000.0)}; };

           CHECK (stepStr( 0) == "lev:0   stepFac:0.000  schedule: 0.000"_expect);
           CHECK (stepStr( 1) == "lev:1   stepFac:0.000  schedule: 0.000"_expect);
           CHECK (stepStr( 2) == "lev:2   stepFac:0.000  schedule: 0.000"_expect);
           CHECK (stepStr( 3) == "lev:3   stepFac:2.000  schedule: 1.000"_expect);
           CHECK (stepStr( 4) == "lev:4   stepFac:2.000  schedule: 1.000"_expect);
           CHECK (stepStr( 5) == "lev:5   stepFac:2.000  schedule: 1.000"_expect);
           CHECK (stepStr( 6) == "lev:6   stepFac:2.000  schedule: 1.000"_expect);
           CHECK (stepStr( 7) == "lev:7   stepFac:3.000  schedule: 1.500"_expect);
           CHECK (stepStr( 8) == "lev:8   stepFac:5.000  schedule: 2.500"_expect);
           CHECK (stepStr( 9) == "lev:9   stepFac:7.000  schedule: 3.500"_expect);
           CHECK (stepStr(10) == "lev:10  stepFac:8.000  schedule: 4.000"_expect);
           CHECK (stepStr(11) == "lev:11  stepFac:8.000  schedule: 4.000"_expect);
           CHECK (stepStr(12) == "lev:12  stepFac:8.000  schedule: 4.000"_expect);
           CHECK (stepStr(13) == "lev:13  stepFac:9.000  schedule: 4.500"_expect);
           CHECK (stepStr(14) == "lev:14  stepFac:10.000 schedule: 5.000"_expect);
           CHECK (stepStr(15) == "lev:15  stepFac:12.000 schedule: 6.000"_expect);
           CHECK (stepStr(16) == "lev:16  stepFac:12.000 schedule: 6.000"_expect);
           CHECK (stepStr(17) == "lev:17  stepFac:13.000 schedule: 6.500"_expect);
           CHECK (stepStr(18) == "lev:18  stepFac:16.000 schedule: 8.000"_expect);
           CHECK (stepStr(19) == "lev:19  stepFac:16.000 schedule: 8.000"_expect);
           CHECK (stepStr(20) == "lev:20  stepFac:20.000 schedule:10.000"_expect);
           CHECK (stepStr(21) == "lev:21  stepFac:22.500 schedule:11.250"_expect);
           CHECK (stepStr(22) == "lev:22  stepFac:24.167 schedule:12.083"_expect);
           CHECK (stepStr(23) == "lev:23  stepFac:26.167 schedule:13.083"_expect);
           CHECK (stepStr(24) == "lev:24  stepFac:28.167 schedule:14.083"_expect);
           CHECK (stepStr(25) == "lev:25  stepFac:30.867 schedule:15.433"_expect);
           CHECK (stepStr(26) == "lev:26  stepFac:32.200 schedule:16.100"_expect);


           // Adapted Schedule with lower stress level and higher concurrency....
           stressFac = 0.3;
           concurrency = 6;
           stepFactors = testLoad.levelScheduleSequence(concurrency).effuse();

           testSetup.withAdaptedSchedule (stressFac, concurrency);
           schedule =  testSetup.getScheduleSeq().effuse();

           CHECK (stepStr( 0) == "lev:0   stepFac:0.000  schedule: 0.000"_expect);
           CHECK (stepStr( 1) == "lev:1   stepFac:0.000  schedule: 0.000"_expect);
           CHECK (stepStr( 2) == "lev:2   stepFac:0.000  schedule: 0.000"_expect);
           CHECK (stepStr( 3) == "lev:3   stepFac:2.000  schedule: 3.333"_expect);
           CHECK (stepStr( 4) == "lev:4   stepFac:2.000  schedule: 3.333"_expect);
           CHECK (stepStr( 5) == "lev:5   stepFac:2.000  schedule: 3.333"_expect);
           CHECK (stepStr( 6) == "lev:6   stepFac:2.000  schedule: 3.333"_expect);
           CHECK (stepStr( 7) == "lev:7   stepFac:3.000  schedule: 5.000"_expect);
           CHECK (stepStr( 8) == "lev:8   stepFac:5.000  schedule: 8.333"_expect);
           CHECK (stepStr( 9) == "lev:9   stepFac:7.000  schedule:11.666"_expect);
           CHECK (stepStr(10) == "lev:10  stepFac:8.000  schedule:13.333"_expect);
           CHECK (stepStr(11) == "lev:11  stepFac:8.000  schedule:13.333"_expect);
           CHECK (stepStr(12) == "lev:12  stepFac:8.000  schedule:13.333"_expect);
           CHECK (stepStr(13) == "lev:13  stepFac:9.000  schedule:15.000"_expect);
           CHECK (stepStr(14) == "lev:14  stepFac:10.000 schedule:16.666"_expect);
           CHECK (stepStr(15) == "lev:15  stepFac:12.000 schedule:20.000"_expect);
           CHECK (stepStr(16) == "lev:16  stepFac:12.000 schedule:20.000"_expect);
           CHECK (stepStr(17) == "lev:17  stepFac:13.000 schedule:21.666"_expect);
           CHECK (stepStr(18) == "lev:18  stepFac:16.000 schedule:26.666"_expect);
           CHECK (stepStr(19) == "lev:19  stepFac:16.000 schedule:26.666"_expect);
           CHECK (stepStr(20) == "lev:20  stepFac:18.000 schedule:30.000"_expect);  // note: here the higher concurrency allows to process all 5 concurrent nodes at once
           CHECK (stepStr(21) == "lev:21  stepFac:20.500 schedule:34.166"_expect);
           CHECK (stepStr(22) == "lev:22  stepFac:22.167 schedule:36.944"_expect);
           CHECK (stepStr(23) == "lev:23  stepFac:23.167 schedule:38.611"_expect);
           CHECK (stepStr(24) == "lev:24  stepFac:24.167 schedule:40.277"_expect);
           CHECK (stepStr(25) == "lev:25  stepFac:25.967 schedule:43.277"_expect);
           CHECK (stepStr(26) == "lev:26  stepFac:27.300 schedule:45.500"_expect);

           // perform a Test with this low stress level (0.3)
           double runTime = testSetup.launch_and_wait();
           double expected = testSetup.getExpectedEndTime();
           CHECK (fabs (runTime-expected) < 5000);
         }    //  Scheduler should be able to follow the expected schedule


       void
       verify_instrumentation()
         {
           MARK_TEST_FUN
           const size_t NODES = 20;
           const size_t CORES = work::Config::COMPUTATION_CAPACITY;
           auto LOAD_BASE = 5ms;

           TestChainLoad testLoad{NODES};

           BlockFlowAlloc bFlow;
           EngineObserver watch;
           Scheduler scheduler{bFlow, watch};

           auto testSetup =
                  testLoad.setWeight(1)
                   .setupSchedule(scheduler)
                   .withLoadTimeBase(LOAD_BASE)
                   .withJobDeadline(50ms)
                   .withInstrumentation()                             // activate an instrumentation bracket around each job invocation
                   ;
           double runTime = testSetup.launch_and_wait();

           auto stat = testSetup.getInvocationStatistic();            // retrieve observed invocation statistics

           CHECK (runTime < stat.activeTime);
           CHECK (isLimited (4900, stat.activeTime/NODES, 8000));     // should be close to 5000
           CHECK (stat.coveredTime < runTime);
           CHECK (NODES == stat.activationCnt);                       // each node activated once
           CHECK (isLimited (CORES/2, stat.avgConcurrency, CORES));   // should ideally come close to hardware concurrency
           CHECK (0 == stat.timeAtConc(0));
           CHECK (0 == stat.timeAtConc(CORES+1));
           CHECK (runTime/2 < stat.timeAtConc(CORES-1)+stat.timeAtConc(CORES));
         }                                                            // should ideally spend most of the time at highest concurrency levels


       using StressRig = StressTestRig<16>;

       void
       search_breaking_point()
         {
           MARK_TEST_FUN

             struct Setup : StressRig
               {
                 uint CONCURRENCY = 4;
                 bool showRuns = true;

                 auto testLoad()
                   { return TestLoad{64}.configureShape_chain_loadBursts(); }

                 auto testSetup (TestLoad& testLoad)
                   {
                     return StressRig::testSetup(testLoad)
                                      .withLoadTimeBase(500us);
                   }

               };

           auto [stress,delta,time] = StressRig::with<Setup>()
                                                .perform<bench::BreakingPoint>();
           CHECK (delta > 2.5);
           CHECK (1.15 > stress and stress > 0.85);
         }


       void
       watch_expenseFunction()
         {
           MARK_TEST_FUN

             struct Setup
               : StressRig, bench::LoadPeak_ParamRange_Evaluation
               {
                 uint CONCURRENCY = 4;
                 uint REPETITIONS = 50;

                 auto testLoad(Param nodes)
                   {
                     TestLoad testLoad{nodes};
                     return testLoad.configure_isolated_nodes();
                   }

                 auto testSetup (TestLoad& testLoad)
                   {
                     return StressRig::testSetup(testLoad)
                                      .withLoadTimeBase(2ms);
                   }
               };

           auto results = StressRig::with<Setup>()
                                    .perform<bench::ParameterRange> (33,128);

           auto [socket,gradient,v1,v2,corr,maxDelta,stdev] = bench::linearRegression (results.param, results.time);
           double avgConc = Setup::avgConcurrency (results);

 //        cout << "───═══───═══───═══───═══───═══───═══───═══───═══───═══───═══───"<<endl;
 //        cout << Setup::renderGnuplot (results)                                   <<endl;
           cout << "───═══───═══───═══───═══───═══───═══───═══───═══───═══───═══───"<<endl;
           cout << _Fmt{"Model: %3.2f·p + %3.2f  corr=%4.2f Δmax=%4.2f σ=%4.2f ∅concurrency: %3.1f"}
                       % gradient % socket % corr % maxDelta % stdev % avgConc
                << endl;

           CHECK (corr > 0.80);                     // clearly a linearly correlated behaviour
           CHECK (isLimited (0.4, gradient, 0.7));  // should be slightly above 0.5 (2ms and 4 threads => 0.5ms / Job)
           CHECK (isLimited (3,   socket,   9  ));  // we have a spin-up and a shut-down both ~ 2ms plus some further overhead

           CHECK (avgConc > 3);                     // should be able to utilise 4 workers (minus the spin-up/shut-down phase)
         }


       void
       investigateWorkProcessing()
         {
           MARK_TEST_FUN
           using StressRig = StressTestRig<8>;

             struct Setup : StressRig
               {
                 uint CONCURRENCY = 4;
                 bool showRuns = true;

                 auto
                 testLoad()
                   {
                     TestLoad testLoad{256};    // use a pattern of 4-step interleaved linear chains
                     testLoad.seedingRule(testLoad.rule().probability(0.6).maxVal(2))
                             .pruningRule(testLoad.rule().probability(0.44))
                             .weightRule(testLoad.value(1))
                             .setSeed(60);
                     return testLoad;
                   }

                 auto testSetup (TestLoad& testLoad)
                   {
                     return StressRig::testSetup(testLoad)
                                      .withLoadTimeBase(5ms);// ◁─────────────── Load 5ms on each Node
                   }
               };
           auto [stress,delta,time] = StressRig::with<Setup>()
                                                .perform<bench::BreakingPoint>();
           cout << "Time for 256 Nodes: "<<time<<"ms with stressFactor="<<stress<<endl;


           /* ========== verify extended stable operation ============== */

           // Use the same pattern, but extended to 4 times the length;
           // moreover, this time planning and execution will be interleaved.
           TestChainLoad<8> testLoad{1024};
           testLoad.seedingRule(testLoad.rule().probability(0.6).maxVal(2))
                   .pruningRule(testLoad.rule().probability(0.44))
                   .weightRule(testLoad.value(1))
                   .setSeed(60)
                   .buildTopology()
 //                .printTopologyDOT()
 //                .printTopologyStatistics()
                   ;
           size_t expectedHash = testLoad.getHash();

           TRANSIENTLY(work::Config::COMPUTATION_CAPACITY) = 4;
           BlockFlowAlloc bFlow;
           EngineObserver watch;
           Scheduler scheduler{bFlow, watch};

           auto testSetup =
             testLoad.setupSchedule(scheduler)
                     .withLoadTimeBase(5ms)
                     .withJobDeadline(50ms)            // ◁───────────────────── deadline is way shorter than overall run time
                     .withChunkSize(32)                // ◁───────────────────── planning of the next 32 nodes interleaved with performance
                     .withInstrumentation()
                     .withAdaptedSchedule (1.0, 4);    // ◁───────────────────── stress factor 1.0 and 4 workers
           double runTime = testSetup.launch_and_wait();
           auto stat = testSetup.getInvocationStatistic();
           cout << "Extended Scheduler Run: "<<runTime/1e6<<"sec concurrency:"<<stat.avgConcurrency<<endl;

           CHECK (stat.activationCnt == 1024);
           CHECK (expectedHash == testLoad.getHash());
           CHECK (3.2 < stat.avgConcurrency);
           CHECK (stat.coveredTime < 5 * time*1000);
         }
     };


   LAUNCHER (SchedulerStress_test, "unit engine");


 }}} // namespace vault::gear::test
vault::gear::BlockFlow< blockFlow::RenderConfig >

vault::gear::test::STAT_NODE
const StatKey STAT_NODE
all nodes
Definition: test-chain-load.hpp:1007

format-cout.hpp
Automatically use custom string conversion in C++ stream output.

TRANSIENTLY
#define TRANSIENTLY(_OO_)
Macro to simplify capturing assignments.
Definition: transiently.hpp:130

vault::gear::test::SchedulerStress_test::smokeTest
void smokeTest()
Definition: scheduler-stress-test.cpp:82

vault::gear::test::StressTestRig::testSetup
auto testSetup(TestLoad &testLoad)
(optional) extension point: base configuration of the test ScheduleCtx
Definition: stress-test-rig.hpp:239

Setup
Definition: Setup.py:1

test
Definition: run.hpp:49

vault::gear::test::SchedulerStress_test::verify_instrumentation
void verify_instrumentation()
Definition: scheduler-stress-test.cpp:279

format-string.hpp
Front-end for printf-style string template interpolation.

vault::gear::test::TestChainLoad::getHash
size_t getHash() const
global hash is the combination of all exit node hashes != 0
Definition: test-chain-load.hpp:431

vault::gear::test::StressTestRig
Configurable template framework for running Scheduler Stress tests Use to build a custom setup class...
Definition: stress-test-rig.hpp:182

vault::gear::test::ComputationalLoad::invoke
double invoke(uint scaleStep=1)
cause a delay by computational load
Definition: test-chain-load.hpp:1339

vault::gear::test::TestChainLoad::buildTopology
TestChainLoad && buildTopology()
Use current configuration and seed to (re)build Node connectivity.
Definition: test-chain-load.hpp:593

vault::gear::test::SchedulerStress_test::search_breaking_point
void search_breaking_point()
Definition: scheduler-stress-test.cpp:337

test-chain-load.hpp
Generate synthetic computation load for Scheduler performance tests.

stress-test-rig.hpp
A test bench to conduct performance measurement series.

vault::gear::test::TestChainLoad
A Generator for synthetic Render Jobs for Scheduler load testing.
Definition: test-chain-load.hpp:258

util::_Fmt
A front-end for using printf-style formatting.
Definition: format-string.hpp:157

test::Test
Abstract Base Class for all testcases.
Definition: run.hpp:62

vault::gear::Scheduler
»Scheduler-Service« : coordinate render activities.
Definition: scheduler.hpp:222

scheduler.hpp
Service for coordination and dispatch of render activities.

MARK_TEST_FUN
#define MARK_TEST_FUN
Macro to mark the current test function in STDOUT.
Definition: test-helper.hpp:433

run.hpp
Simple test class runner.

util.hpp
Tiny helper functions and shortcuts to be used everywhere Consider this header to be effectively incl...

test-helper.hpp
A collection of frequently used helper functions to support unit testing.

vault::gear::test::TestChainLoad::configureShape_chain_loadBursts
TestChainLoad && configureShape_chain_loadBursts()
preconfigured topology: single graph with massive »load bursts«
Definition: test-chain-load.hpp:575

vault::gear::test::STAT_JOIN
const StatKey STAT_JOIN
joining node
Definition: test-chain-load.hpp:1012

vault::gear::test::TestChainLoad::computeGraphStatistics
Statistic computeGraphStatistics()
Operator on TestChainLoad to evaluate current graph connectivity.
Definition: test-chain-load.hpp:1180

vault::gear::test::bench::LoadPeak_ParamRange_Evaluation
Mix-in for setup of a #ParameterRange evaluation to watch the processing of a single load peak...
Definition: stress-test-rig.hpp:584

vault::gear::test::STAT_FORK
const StatKey STAT_FORK
forking node
Definition: test-chain-load.hpp:1011

vault::gear::test::SchedulerStress_test::watch_expenseFunction
void watch_expenseFunction()
Definition: scheduler-stress-test.cpp:376

vault::gear::test::SchedulerStress_test::setup_systematicSchedule
void setup_systematicSchedule()
Definition: scheduler-stress-test.cpp:141

vault::gear::test::SchedulerStress_test::investigateWorkProcessing
void investigateWorkProcessing()
Definition: scheduler-stress-test.cpp:431

vault::gear::work::Config::COMPUTATION_CAPACITY
static size_t COMPUTATION_CAPACITY
Nominal »full size« of a pool of concurrent workers.
Definition: work-force.hpp:115

vault::gear::test::SchedulerStress_test
Definition: scheduler-stress-test.cpp:64

timevalue.hpp
a family of time value like entities and their relationships.

vault
Vault-Layer implementation namespace root.
Definition: vault/common.hpp:63

vault::gear::EngineObserver
Collector and aggregator for performance data.
Definition: engine-observer.hpp:122

vault::gear::test::ComputationalLoad
A calibratable CPU load to be invoked from a node job functor.
Definition: test-chain-load.hpp:1319

lib::time
Definition: common-services.cpp:64