Lumiera  0.pre.03
»edit your freedom«
scheduler-stress-test.cpp
1 /*
2  SchedulerStress(Test) - verify scheduler performance characteristics
3 
4  Copyright (C) Lumiera.org
5  2024, Hermann Vosseler <Ichthyostega@web.de>
6 
7  This program is free software; you can redistribute it and/or
8  modify it under the terms of the GNU General Public License as
9  published by the Free Software Foundation; either version 2 of
10  the License, or (at your option) any later version.
11 
12  This program is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  GNU General Public License for more details.
16 
17  You should have received a copy of the GNU General Public License
18  along with this program; if not, write to the Free Software
19  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 
21 * *****************************************************/
22 
28 #include "lib/test/run.hpp"
29 #include "test-chain-load.hpp"
30 #include "stress-test-rig.hpp"
31 #include "lib/test/test-helper.hpp"
32 #include "vault/gear/scheduler.hpp"
33 #include "lib/time/timevalue.hpp"
34 #include "lib/format-string.hpp"
35 #include "lib/format-cout.hpp"
36 #include "lib/util.hpp"
37 
38 using test::Test;
39 
40 
41 namespace vault{
42 namespace gear {
43 namespace test {
44 
45  using util::_Fmt;
46  using util::isLimited;
47 
48 
49 
50  /***************************************************************************/
64  class SchedulerStress_test : public Test
65  {
66 
67  virtual void
68  run (Arg)
69  {
70  smokeTest();
76  }
77 
78 
81  void
83  {
85  TestChainLoad testLoad{512};
87  .buildTopology()
88 // .printTopologyDOT()
89  ;
90 
91  auto stats = testLoad.computeGraphStatistics();
92  cout << _Fmt{"Test-Load: Nodes: %d Levels: %d ∅Node/Level: %3.1f Forks: %d Joins: %d"}
93  % stats.nodes
94  % stats.levels
95  % stats.indicators[STAT_NODE].pL
96  % stats.indicators[STAT_FORK].cnt
97  % stats.indicators[STAT_JOIN].cnt
98  << endl;
99 
100  // while building the calculation-plan graph
101  // node hashes were computed, observing dependencies
102  size_t expectedHash = testLoad.getHash();
103 
104  // some jobs/nodes are marked with a weight-step
105  // these can be instructed to spend some CPU time
106  auto LOAD_BASE = 500us;
107  testLoad.performGraphSynchronously(LOAD_BASE);
108  CHECK (testLoad.getHash() == expectedHash);
109 
110  double referenceTime = testLoad.calcRuntimeReference(LOAD_BASE);
111  cout << "refTime(singleThr): "<<referenceTime/1000<<"ms"<<endl;
112 
113 
114  // Perform through Scheduler----------
115  BlockFlowAlloc bFlow;
116  EngineObserver watch;
117  Scheduler scheduler{bFlow, watch};
118 
119  double performanceTime =
120  testLoad.setupSchedule(scheduler)
121  .withLoadTimeBase(LOAD_BASE)
122  .withJobDeadline(150ms)
123  .withPlanningStep(200us)
124  .withChunkSize(20)
125  .launch_and_wait();
126 
127  cout << "runTime(Scheduler): "<<performanceTime/1000<<"ms"<<endl;
128 
129  // invocation through Scheduler has reproduced all node hashes
130  CHECK (testLoad.getHash() == expectedHash);
131  }
132 
133 
134 
140  void
142  {
144  TestChainLoad testLoad{64};
146  .buildTopology()
147 // .printTopologyDOT()
148 // .printTopologyStatistics()
149  ;
150 
151  auto LOAD_BASE = 500us;
152  ComputationalLoad cpuLoad;
153  cpuLoad.timeBase = LOAD_BASE;
154  cpuLoad.calibrate();
155 
156  double micros = cpuLoad.invoke();
157  CHECK (micros < 550);
158  CHECK (micros > 450);
159 
160  // build a schedule sequence based on
161  // summing up weight factors, with example concurrency ≔ 4
162  uint concurrency = 4;
163  auto stepFactors = testLoad.levelScheduleSequence(concurrency).effuse();
164  CHECK (stepFactors.size() == 1+testLoad.topLevel());
165  CHECK (stepFactors.size() == 26);
166 
167 
168  // Build-Performance-test-setup--------
169  BlockFlowAlloc bFlow;
170  EngineObserver watch;
171  Scheduler scheduler{bFlow, watch};
172 
173  auto testSetup =
174  testLoad.setupSchedule(scheduler)
175  .withLoadTimeBase(LOAD_BASE)
176  .withJobDeadline(50ms)
177  .withUpfrontPlanning();
178 
179  auto schedule = testSetup.getScheduleSeq().effuse();
180  CHECK (schedule.size() == testLoad.topLevel() + 2);
181  CHECK (schedule[ 0] == _uTicks(0ms));
182  CHECK (schedule[ 1] == _uTicks(1ms));
183  CHECK (schedule[ 2] == _uTicks(2ms));
184  // ....
185  CHECK (schedule[24] == _uTicks(24ms));
186  CHECK (schedule[25] == _uTicks(25ms));
187  CHECK (schedule[26] == _uTicks(26ms));
188 
189  // Adapted Schedule----------
190  double stressFac = 1.0;
191  testSetup.withAdaptedSchedule (stressFac, concurrency);
192  schedule = testSetup.getScheduleSeq().effuse();
193  CHECK (schedule.size() == testLoad.topLevel() + 2);
194  CHECK (schedule[ 0] == _uTicks(0ms));
195  CHECK (schedule[ 1] == _uTicks(0ms));
196 
197  // verify the numbers in detail....
198  _Fmt stepFmt{"lev:%-2d stepFac:%-6.3f schedule:%6.3f"};
199  auto stepStr = [&](uint i){ return string{stepFmt % i % stepFactors[i>0?i-1:0] % (_raw(schedule[i])/1000.0)}; };
200 
201  CHECK (stepStr( 0) == "lev:0 stepFac:0.000 schedule: 0.000"_expect);
202  CHECK (stepStr( 1) == "lev:1 stepFac:0.000 schedule: 0.000"_expect);
203  CHECK (stepStr( 2) == "lev:2 stepFac:0.000 schedule: 0.000"_expect);
204  CHECK (stepStr( 3) == "lev:3 stepFac:2.000 schedule: 1.000"_expect);
205  CHECK (stepStr( 4) == "lev:4 stepFac:2.000 schedule: 1.000"_expect);
206  CHECK (stepStr( 5) == "lev:5 stepFac:2.000 schedule: 1.000"_expect);
207  CHECK (stepStr( 6) == "lev:6 stepFac:2.000 schedule: 1.000"_expect);
208  CHECK (stepStr( 7) == "lev:7 stepFac:3.000 schedule: 1.500"_expect);
209  CHECK (stepStr( 8) == "lev:8 stepFac:5.000 schedule: 2.500"_expect);
210  CHECK (stepStr( 9) == "lev:9 stepFac:7.000 schedule: 3.500"_expect);
211  CHECK (stepStr(10) == "lev:10 stepFac:8.000 schedule: 4.000"_expect);
212  CHECK (stepStr(11) == "lev:11 stepFac:8.000 schedule: 4.000"_expect);
213  CHECK (stepStr(12) == "lev:12 stepFac:8.000 schedule: 4.000"_expect);
214  CHECK (stepStr(13) == "lev:13 stepFac:9.000 schedule: 4.500"_expect);
215  CHECK (stepStr(14) == "lev:14 stepFac:10.000 schedule: 5.000"_expect);
216  CHECK (stepStr(15) == "lev:15 stepFac:12.000 schedule: 6.000"_expect);
217  CHECK (stepStr(16) == "lev:16 stepFac:12.000 schedule: 6.000"_expect);
218  CHECK (stepStr(17) == "lev:17 stepFac:13.000 schedule: 6.500"_expect);
219  CHECK (stepStr(18) == "lev:18 stepFac:16.000 schedule: 8.000"_expect);
220  CHECK (stepStr(19) == "lev:19 stepFac:16.000 schedule: 8.000"_expect);
221  CHECK (stepStr(20) == "lev:20 stepFac:20.000 schedule:10.000"_expect);
222  CHECK (stepStr(21) == "lev:21 stepFac:22.500 schedule:11.250"_expect);
223  CHECK (stepStr(22) == "lev:22 stepFac:24.167 schedule:12.083"_expect);
224  CHECK (stepStr(23) == "lev:23 stepFac:26.167 schedule:13.083"_expect);
225  CHECK (stepStr(24) == "lev:24 stepFac:28.167 schedule:14.083"_expect);
226  CHECK (stepStr(25) == "lev:25 stepFac:30.867 schedule:15.433"_expect);
227  CHECK (stepStr(26) == "lev:26 stepFac:32.200 schedule:16.100"_expect);
228 
229 
230  // Adapted Schedule with lower stress level and higher concurrency....
231  stressFac = 0.3;
232  concurrency = 6;
233  stepFactors = testLoad.levelScheduleSequence(concurrency).effuse();
234 
235  testSetup.withAdaptedSchedule (stressFac, concurrency);
236  schedule = testSetup.getScheduleSeq().effuse();
237 
238  CHECK (stepStr( 0) == "lev:0 stepFac:0.000 schedule: 0.000"_expect);
239  CHECK (stepStr( 1) == "lev:1 stepFac:0.000 schedule: 0.000"_expect);
240  CHECK (stepStr( 2) == "lev:2 stepFac:0.000 schedule: 0.000"_expect);
241  CHECK (stepStr( 3) == "lev:3 stepFac:2.000 schedule: 3.333"_expect);
242  CHECK (stepStr( 4) == "lev:4 stepFac:2.000 schedule: 3.333"_expect);
243  CHECK (stepStr( 5) == "lev:5 stepFac:2.000 schedule: 3.333"_expect);
244  CHECK (stepStr( 6) == "lev:6 stepFac:2.000 schedule: 3.333"_expect);
245  CHECK (stepStr( 7) == "lev:7 stepFac:3.000 schedule: 5.000"_expect);
246  CHECK (stepStr( 8) == "lev:8 stepFac:5.000 schedule: 8.333"_expect);
247  CHECK (stepStr( 9) == "lev:9 stepFac:7.000 schedule:11.666"_expect);
248  CHECK (stepStr(10) == "lev:10 stepFac:8.000 schedule:13.333"_expect);
249  CHECK (stepStr(11) == "lev:11 stepFac:8.000 schedule:13.333"_expect);
250  CHECK (stepStr(12) == "lev:12 stepFac:8.000 schedule:13.333"_expect);
251  CHECK (stepStr(13) == "lev:13 stepFac:9.000 schedule:15.000"_expect);
252  CHECK (stepStr(14) == "lev:14 stepFac:10.000 schedule:16.666"_expect);
253  CHECK (stepStr(15) == "lev:15 stepFac:12.000 schedule:20.000"_expect);
254  CHECK (stepStr(16) == "lev:16 stepFac:12.000 schedule:20.000"_expect);
255  CHECK (stepStr(17) == "lev:17 stepFac:13.000 schedule:21.666"_expect);
256  CHECK (stepStr(18) == "lev:18 stepFac:16.000 schedule:26.666"_expect);
257  CHECK (stepStr(19) == "lev:19 stepFac:16.000 schedule:26.666"_expect);
258  CHECK (stepStr(20) == "lev:20 stepFac:18.000 schedule:30.000"_expect); // note: here the higher concurrency allows to process all 5 concurrent nodes at once
259  CHECK (stepStr(21) == "lev:21 stepFac:20.500 schedule:34.166"_expect);
260  CHECK (stepStr(22) == "lev:22 stepFac:22.167 schedule:36.944"_expect);
261  CHECK (stepStr(23) == "lev:23 stepFac:23.167 schedule:38.611"_expect);
262  CHECK (stepStr(24) == "lev:24 stepFac:24.167 schedule:40.277"_expect);
263  CHECK (stepStr(25) == "lev:25 stepFac:25.967 schedule:43.277"_expect);
264  CHECK (stepStr(26) == "lev:26 stepFac:27.300 schedule:45.500"_expect);
265 
266  // perform a Test with this low stress level (0.3)
267  double runTime = testSetup.launch_and_wait();
268  double expected = testSetup.getExpectedEndTime();
269  CHECK (fabs (runTime-expected) < 5000);
270  } // Scheduler should be able to follow the expected schedule
271 
272 
273 
274 
278  void
280  {
282  const size_t NODES = 20;
283  const size_t CORES = work::Config::COMPUTATION_CAPACITY;
284  auto LOAD_BASE = 5ms;
285 
286  TestChainLoad testLoad{NODES};
287 
288  BlockFlowAlloc bFlow;
289  EngineObserver watch;
290  Scheduler scheduler{bFlow, watch};
291 
292  auto testSetup =
293  testLoad.setWeight(1)
294  .setupSchedule(scheduler)
295  .withLoadTimeBase(LOAD_BASE)
296  .withJobDeadline(50ms)
297  .withInstrumentation() // activate an instrumentation bracket around each job invocation
298  ;
299  double runTime = testSetup.launch_and_wait();
300 
301  auto stat = testSetup.getInvocationStatistic(); // retrieve observed invocation statistics
302 
303  CHECK (runTime < stat.activeTime);
304  CHECK (isLimited (4900, stat.activeTime/NODES, 8000)); // should be close to 5000
305  CHECK (stat.coveredTime < runTime);
306  CHECK (NODES == stat.activationCnt); // each node activated once
307  CHECK (isLimited (CORES/2, stat.avgConcurrency, CORES)); // should ideally come close to hardware concurrency
308  CHECK (0 == stat.timeAtConc(0));
309  CHECK (0 == stat.timeAtConc(CORES+1));
310  CHECK (runTime/2 < stat.timeAtConc(CORES-1)+stat.timeAtConc(CORES));
311  } // should ideally spend most of the time at highest concurrency levels
312 
313 
314 
315 
317 
336  void
338  {
340 
341  struct Setup : StressRig
342  {
343  uint CONCURRENCY = 4;
344  bool showRuns = true;
345 
346  auto testLoad()
347  { return TestLoad{64}.configureShape_chain_loadBursts(); }
348 
349  auto testSetup (TestLoad& testLoad)
350  {
351  return StressRig::testSetup(testLoad)
352  .withLoadTimeBase(500us);
353  }
354 
355  };
356 
357  auto [stress,delta,time] = StressRig::with<Setup>()
358  .perform<bench::BreakingPoint>();
359  CHECK (delta > 2.5);
360  CHECK (1.15 > stress and stress > 0.85);
361  }
362 
363 
364 
375  void
377  {
379 
380  struct Setup
382  {
383  uint CONCURRENCY = 4;
384  uint REPETITIONS = 50;
385 
386  auto testLoad(Param nodes)
387  {
388  TestLoad testLoad{nodes};
389  return testLoad.configure_isolated_nodes();
390  }
391 
392  auto testSetup (TestLoad& testLoad)
393  {
394  return StressRig::testSetup(testLoad)
395  .withLoadTimeBase(2ms);
396  }
397  };
398 
399  auto results = StressRig::with<Setup>()
400  .perform<bench::ParameterRange> (33,128);
401 
402  auto [socket,gradient,v1,v2,corr,maxDelta,stdev] = bench::linearRegression (results.param, results.time);
403  double avgConc = Setup::avgConcurrency (results);
404 
405 // cout << "───═══───═══───═══───═══───═══───═══───═══───═══───═══───═══───"<<endl;
406 // cout << Setup::renderGnuplot (results) <<endl;
407  cout << "───═══───═══───═══───═══───═══───═══───═══───═══───═══───═══───"<<endl;
408  cout << _Fmt{"Model: %3.2f·p + %3.2f corr=%4.2f Δmax=%4.2f σ=%4.2f ∅concurrency: %3.1f"}
409  % gradient % socket % corr % maxDelta % stdev % avgConc
410  << endl;
411 
412  CHECK (corr > 0.80); // clearly a linearly correlated behaviour
413  CHECK (isLimited (0.4, gradient, 0.7)); // should be slightly above 0.5 (2ms and 4 threads => 0.5ms / Job)
414  CHECK (isLimited (3, socket, 9 )); // we have a spin-up and a shut-down both ~ 2ms plus some further overhead
415 
416  CHECK (avgConc > 3); // should be able to utilise 4 workers (minus the spin-up/shut-down phase)
417  }
418 
419 
420 
430  void
432  {
434  using StressRig = StressTestRig<8>;
435 
436  struct Setup : StressRig
437  {
438  uint CONCURRENCY = 4;
439  bool showRuns = true;
440 
441  auto
442  testLoad()
443  {
444  TestLoad testLoad{256}; // use a pattern of 4-step interleaved linear chains
445  testLoad.seedingRule(testLoad.rule().probability(0.6).maxVal(2))
446  .pruningRule(testLoad.rule().probability(0.44))
447  .weightRule(testLoad.value(1))
448  .setSeed(60);
449  return testLoad;
450  }
451 
452  auto testSetup (TestLoad& testLoad)
453  {
454  return StressRig::testSetup(testLoad)
455  .withLoadTimeBase(5ms);// ◁─────────────── Load 5ms on each Node
456  }
457  };
458  auto [stress,delta,time] = StressRig::with<Setup>()
459  .perform<bench::BreakingPoint>();
460  cout << "Time for 256 Nodes: "<<time<<"ms with stressFactor="<<stress<<endl;
461 
462 
463  /* ========== verify extended stable operation ============== */
464 
465  // Use the same pattern, but extended to 4 times the length;
466  // moreover, this time planning and execution will be interleaved.
467  TestChainLoad<8> testLoad{1024};
468  testLoad.seedingRule(testLoad.rule().probability(0.6).maxVal(2))
469  .pruningRule(testLoad.rule().probability(0.44))
470  .weightRule(testLoad.value(1))
471  .setSeed(60)
472  .buildTopology()
473 // .printTopologyDOT()
474 // .printTopologyStatistics()
475  ;
476  size_t expectedHash = testLoad.getHash();
477 
479  BlockFlowAlloc bFlow;
480  EngineObserver watch;
481  Scheduler scheduler{bFlow, watch};
482 
483  auto testSetup =
484  testLoad.setupSchedule(scheduler)
485  .withLoadTimeBase(5ms)
486  .withJobDeadline(50ms) // ◁───────────────────── deadline is way shorter than overall run time
487  .withChunkSize(32) // ◁───────────────────── planning of the next 32 nodes interleaved with performance
488  .withInstrumentation()
489  .withAdaptedSchedule (1.0, 4); // ◁───────────────────── stress factor 1.0 and 4 workers
490  double runTime = testSetup.launch_and_wait();
491  auto stat = testSetup.getInvocationStatistic();
492  cout << "Extended Scheduler Run: "<<runTime/1e6<<"sec concurrency:"<<stat.avgConcurrency<<endl;
493 
494  CHECK (stat.activationCnt == 1024);
495  CHECK (expectedHash == testLoad.getHash());
496  CHECK (3.2 < stat.avgConcurrency);
497  CHECK (stat.coveredTime < 5 * time*1000);
498  }
499  };
500 
501 
503  LAUNCHER (SchedulerStress_test, "unit engine");
504 
505 
506 
507 }}} // namespace vault::gear::test
const StatKey STAT_NODE
all nodes
Automatically use custom string conversion in C++ stream output.
#define TRANSIENTLY(_OO_)
Macro to simplify capturing assignments.
auto testSetup(TestLoad &testLoad)
(optional) extension point: base configuration of the test ScheduleCtx
Definition: Setup.py:1
Definition: run.hpp:49
Front-end for printf-style string template interpolation.
size_t getHash() const
global hash is the combination of all exit node hashes != 0
Configurable template framework for running Scheduler Stress tests Use to build a custom setup class...
double invoke(uint scaleStep=1)
cause a delay by computational load
TestChainLoad && buildTopology()
Use current configuration and seed to (re)build Node connectivity.
Generate synthetic computation load for Scheduler performance tests.
A test bench to conduct performance measurement series.
A Generator for synthetic Render Jobs for Scheduler load testing.
A front-end for using printf-style formatting.
Abstract Base Class for all testcases.
Definition: run.hpp:62
»Scheduler-Service« : coordinate render activities.
Definition: scheduler.hpp:222
Service for coordination and dispatch of render activities.
#define MARK_TEST_FUN
Macro to mark the current test function in STDOUT.
Simple test class runner.
Tiny helper functions and shortcuts to be used everywhere Consider this header to be effectively incl...
A collection of frequently used helper functions to support unit testing.
TestChainLoad && configureShape_chain_loadBursts()
preconfigured topology: single graph with massive »load bursts«
const StatKey STAT_JOIN
joining node
Statistic computeGraphStatistics()
Operator on TestChainLoad to evaluate current graph connectivity.
Mix-in for setup of a #ParameterRange evaluation to watch the processing of a single load peak...
const StatKey STAT_FORK
forking node
static size_t COMPUTATION_CAPACITY
Nominal »full size« of a pool of concurrent workers.
Definition: work-force.hpp:115
a family of time value like entities and their relationships.
Vault-Layer implementation namespace root.
Collector and aggregator for performance data.
A calibratable CPU load to be invoked from a node job functor.