@@ -79,7 +79,8 @@ type cpuTreeAllocatorOptions struct {
79
79
// topologyBalancing true prefers allocating from branches
80
80
// with most free CPUs (spread allocations), while false is
81
81
// the opposite (packed allocations).
82
- topologyBalancing bool
82
+ topologyBalancing bool
83
+ preferSpreadOnPhysicalCores bool
83
84
}
84
85
85
86
// Strings returns topology level as a string
@@ -131,6 +132,19 @@ func (t *cpuTreeNode) String() string {
131
132
return fmt .Sprintf ("%s%v" , t .name , t .children )
132
133
}
133
134
135
+ func (t * cpuTreeNode ) PrettyPrint () string {
136
+ origDepth := t .Depth ()
137
+ lines := []string {}
138
+ t .DepthFirstWalk (func (tn * cpuTreeNode ) error {
139
+ lines = append (lines ,
140
+ fmt .Sprintf ("%s%s: %q cpus: %s" ,
141
+ strings .Repeat (" " , (tn .Depth ()- origDepth )* 4 ),
142
+ tn .level , tn .name , tn .cpus ))
143
+ return nil
144
+ })
145
+ return strings .Join (lines , "\n " )
146
+ }
147
+
134
148
// String returns cpuTreeNodeAttributes as a string.
135
149
func (tna cpuTreeNodeAttributes ) String () string {
136
150
return fmt .Sprintf ("%s{%d,%v,%d,%d}" , tna .t .name , tna .depth ,
@@ -146,6 +160,34 @@ func NewCpuTree(name string) *cpuTreeNode {
146
160
}
147
161
}
148
162
163
+ func (t * cpuTreeNode ) CopyTree () * cpuTreeNode {
164
+ newNode := t .CopyNode ()
165
+ newNode .children = make ([]* cpuTreeNode , 0 , len (t .children ))
166
+ for _ , child := range t .children {
167
+ newNode .AddChild (child .CopyTree ())
168
+ }
169
+ return newNode
170
+ }
171
+
172
+ func (t * cpuTreeNode ) CopyNode () * cpuTreeNode {
173
+ newNode := cpuTreeNode {
174
+ name : t .name ,
175
+ level : t .level ,
176
+ parent : t .parent ,
177
+ children : t .children ,
178
+ cpus : t .cpus ,
179
+ }
180
+ return & newNode
181
+ }
182
+
183
+ // Depth returns the distance from the root node.
184
+ func (t * cpuTreeNode ) Depth () int {
185
+ if t .parent == nil {
186
+ return 0
187
+ }
188
+ return t .parent .Depth () + 1
189
+ }
190
+
149
191
// AddChild adds new child node to a CPU tree node.
150
192
func (t * cpuTreeNode ) AddChild (child * cpuTreeNode ) {
151
193
child .parent = t
@@ -165,6 +207,38 @@ func (t *cpuTreeNode) Cpus() cpuset.CPUSet {
165
207
return t .cpus
166
208
}
167
209
210
+ // SiblingIndex returns the index of this node among its parents
211
+ // children. Returns -1 for the root node, -2 if this node is not
212
+ // listed among the children of its parent.
213
+ func (t * cpuTreeNode ) SiblingIndex () int {
214
+ if t .parent == nil {
215
+ return - 1
216
+ }
217
+ for idx , child := range t .parent .children {
218
+ if child == t {
219
+ return idx
220
+ }
221
+ }
222
+ return - 2
223
+ }
224
+
225
+ func (t * cpuTreeNode ) FindLeafWithCpu (cpu int ) * cpuTreeNode {
226
+ var found * cpuTreeNode
227
+ t .DepthFirstWalk (func (tn * cpuTreeNode ) error {
228
+ if len (tn .children ) > 0 {
229
+ return nil
230
+ }
231
+ for _ , cpuHere := range tn .cpus .List () {
232
+ if cpu == cpuHere {
233
+ found = tn
234
+ return WalkStop
235
+ }
236
+ }
237
+ return nil // not found here, no more children to search
238
+ })
239
+ return found
240
+ }
241
+
168
242
// WalkSkipChildren error returned from a DepthFirstWalk handler
169
243
// prevents walking deeper in the tree. The caller of the
170
244
// DepthFirstWalk will get no error.
@@ -236,13 +310,18 @@ func NewCpuTreeFromSystem() (*cpuTreeNode, error) {
236
310
nodeTree .level = CPUTopologyLevelNuma
237
311
dieTree .AddChild (nodeTree )
238
312
node := sys .Node (nodeID )
313
+ threadsSeen := map [int ]struct {}{}
239
314
for _ , cpuID := range node .CPUSet ().List () {
315
+ if _ , alreadySeen := threadsSeen [cpuID ]; alreadySeen {
316
+ continue
317
+ }
240
318
cpuTree := NewCpuTree (fmt .Sprintf ("p%dd%dn%dcpu%d" , packageID , dieID , nodeID , cpuID ))
241
319
242
320
cpuTree .level = CPUTopologyLevelCore
243
321
nodeTree .AddChild (cpuTree )
244
322
cpu := sys .CPU (cpuID )
245
323
for _ , threadID := range cpu .ThreadCPUSet ().List () {
324
+ threadsSeen [threadID ] = struct {}{}
246
325
threadTree := NewCpuTree (fmt .Sprintf ("p%dd%dn%dcpu%dt%d" , packageID , dieID , nodeID , cpuID , threadID ))
247
326
threadTree .level = CPUTopologyLevelThread
248
327
cpuTree .AddChild (threadTree )
@@ -312,13 +391,83 @@ func (t *cpuTreeNode) toAttributedSlice(
312
391
}
313
392
}
314
393
394
+ // SplitLevel returns the root node of a new CPU tree where all
395
+ // branches of a topology level have been split into new classes.
396
+ func (t * cpuTreeNode ) SplitLevel (splitLevel CPUTopologyLevel , cpuClassifier func (int ) int ) * cpuTreeNode {
397
+ newRoot := t .CopyTree ()
398
+ newRoot .DepthFirstWalk (func (tn * cpuTreeNode ) error {
399
+ // Dive into the level that will be split.
400
+ if tn .level != splitLevel {
401
+ return nil
402
+ }
403
+ // Classify CPUs to the map: class -> list of cpus
404
+ classCpus := map [int ][]int {}
405
+ for _ , cpu := range t .cpus .List () {
406
+ class := cpuClassifier (cpu )
407
+ classCpus [class ] = append (classCpus [class ], cpu )
408
+ }
409
+ // Clear existing children of this node. New children
410
+ // will be classes whose children are masked versions
411
+ // of original children of this node.
412
+ origChildren := tn .children
413
+ tn .children = make ([]* cpuTreeNode , 0 , len (classCpus ))
414
+ // Add new child corresponding each class.
415
+ for class , cpus := range classCpus {
416
+ cpuMask := cpuset .New (cpus ... )
417
+ newNode := NewCpuTree (fmt .Sprintf ("%sclass%d" , tn .name , class ))
418
+ tn .AddChild (newNode )
419
+ newNode .cpus = tn .cpus .Intersection (cpuMask )
420
+ newNode .level = tn .level
421
+ newNode .parent = tn
422
+ for _ , child := range origChildren {
423
+ newChild := child .CopyTree ()
424
+ newChild .DepthFirstWalk (func (cn * cpuTreeNode ) error {
425
+ cn .cpus = cn .cpus .Intersection (cpuMask )
426
+ if cn .cpus .Size () == 0 && cn .parent != nil {
427
+ // all cpus masked
428
+ // out: cut out this
429
+ // branch
430
+ newSiblings := []* cpuTreeNode {}
431
+ for _ , child := range cn .parent .children {
432
+ if child != cn {
433
+ newSiblings = append (newSiblings , child )
434
+ }
435
+ }
436
+ cn .parent .children = newSiblings
437
+ return WalkSkipChildren
438
+ }
439
+ return nil
440
+ })
441
+ newNode .AddChild (newChild )
442
+ }
443
+ }
444
+ return WalkSkipChildren
445
+ })
446
+ return newRoot
447
+ }
448
+
315
449
// NewAllocator returns new CPU allocator for allocating CPUs from a
316
450
// CPU tree branch.
317
451
func (t * cpuTreeNode ) NewAllocator (options cpuTreeAllocatorOptions ) * cpuTreeAllocator {
318
452
ta := & cpuTreeAllocator {
319
453
root : t ,
320
454
options : options ,
321
455
}
456
+ if options .preferSpreadOnPhysicalCores {
457
+ newTree := t .SplitLevel (CPUTopologyLevelNuma ,
458
+ // CPU classifier: class of the CPU equals to
459
+ // the index in the child list of its parent
460
+ // node in the tree. Expect leaf node is a
461
+ // hyperthread, parent a physical core.
462
+ func (cpu int ) int {
463
+ leaf := t .FindLeafWithCpu (cpu )
464
+ if leaf == nil {
465
+ log .Fatalf ("SplitLevel CPU classifier: cpu %d not in tree:\n %s\n \n " , cpu , t .PrettyPrint ())
466
+ }
467
+ return leaf .SiblingIndex ()
468
+ })
469
+ ta .root = newTree
470
+ }
322
471
return ta
323
472
}
324
473
@@ -409,7 +558,36 @@ func (ta *cpuTreeAllocator) sorterRelease(tnas []cpuTreeNodeAttributes) func(int
409
558
// abs(delta) CPUs can be freed.
410
559
func (ta * cpuTreeAllocator ) ResizeCpus (currentCpus , freeCpus cpuset.CPUSet , delta int ) (cpuset.CPUSet , cpuset.CPUSet , error ) {
411
560
if delta > 0 {
412
- return ta .resizeCpus (currentCpus , freeCpus , delta )
561
+ addFromSuperset , removeFromSuperset , err := ta .resizeCpus (currentCpus , freeCpus , delta )
562
+ if ! ta .options .preferSpreadOnPhysicalCores || addFromSuperset .Size () == delta {
563
+ return addFromSuperset , removeFromSuperset , err
564
+ }
565
+ // addFromSuperset contains more CPUs (equally good
566
+ // choices) than actually needed. In case of
567
+ // preferSpreadOnPhysicalCores, however, selecting any
568
+ // of these does not result in equally good
569
+ // result. Therefore, in this case, construct addFrom
570
+ // set by adding one CPU at a time.
571
+ addFrom := cpuset .New ()
572
+ for n := 0 ; n < delta ; n ++ {
573
+ addSingleFrom , _ , err := ta .resizeCpus (currentCpus , freeCpus , 1 )
574
+ if err != nil {
575
+ return addFromSuperset , removeFromSuperset , err
576
+ }
577
+ if addSingleFrom .Size () != 1 {
578
+ return addFromSuperset , removeFromSuperset , fmt .Errorf ("internal error: failed to find single CPU to allocate, " +
579
+ "currentCpus=%s freeCpus=%s expectedSingle=%s" ,
580
+ currentCpus , freeCpus , addSingleFrom )
581
+ }
582
+ addFrom = addFrom .Union (addSingleFrom )
583
+ if addFrom .Size () != n + 1 {
584
+ return addFromSuperset , removeFromSuperset , fmt .Errorf ("internal error: double add the same CPU (%s) to cpuset %s on round %d" ,
585
+ addSingleFrom , addFrom , n + 1 )
586
+ }
587
+ currentCpus = currentCpus .Union (addSingleFrom )
588
+ freeCpus = freeCpus .Difference (addSingleFrom )
589
+ }
590
+ return addFrom , removeFromSuperset , nil
413
591
}
414
592
// In multi-CPU removal, remove CPUs one by one instead of
415
593
// trying to find a single topology element from which all of
0 commit comments