Skip to content
This repository was archived by the owner on Apr 17, 2025. It is now read-only.

Commit 02247ad

Browse files
authored
Merge pull request #237 from adrianludwin/hrq-refactor
Refactor some HRQ code for readability
2 parents ad5cec6 + eec54ca commit 02247ad

File tree

6 files changed

+77
-230
lines changed

6 files changed

+77
-230
lines changed

internal/forest/namespacehrq.go

Lines changed: 53 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -36,18 +36,12 @@ type usage struct {
3636
subtree v1.ResourceList
3737
}
3838

39-
// TryUseResources checks resource limits in the namespace and its ancestors
40-
// when given proposed absolute (not delta) resource usages in the namespace. If
41-
// the proposed resource usages are the same as the current usages, of course
42-
// it's allowed and we do nothing. If there are any changes in the usages, we
43-
// only check to see if the proposed changing usages are allowed. If any of them
44-
// exceed resource limits, it returns an error; otherwise, it returns nil.
45-
// Callers of this method are responsible for updating resource usage status of
46-
// the HierarchicalResourceQuota objects.
47-
//
48-
// If the proposed resource usages changes are allowed, the method also updates
49-
// in-memory local resource usages of the current namespace and the subtree
50-
// resource usages of its ancestors (including itself).
39+
// TryUseResources checks resource limits in the namespace and its ancestors when given proposed
40+
// absolute (not delta) resource usages in the namespace. If there are any changes in the usages, we
41+
// only check to see if any proposed increases take us over any limits. If any of them exceed
42+
// resource limits, it returns an error suitable to display to end users; otherwise, it updates the
43+
// in-memory usages of both this namespace as well as all its ancestors. Callers of this method are
44+
// responsible for updating resource usage status of the HierarchicalResourceQuota objects.
5145
//
5246
// TryUseResources is called by the HRQ admission controller to decide if a ResourceQuota.Status
5347
// update issued by the K8s ResourceQuota admission controller is allowed. Since UseResources()
@@ -62,38 +56,21 @@ type usage struct {
6256
// etcd) but the apiserver runs a cleanup process that occasionally syncs up actual usage with the
6357
// usage recorded in RQs. When the RQs are changed, we'll be updated too.
6458
//
65-
// Based on observations, the K8s ResourceQuota admission controller is called only
66-
// when a resource is consumed, not when a resource is released. Therefore, in most cases,
67-
// the proposed resource usages that the HRQ admission controller received should
68-
// be larger than in-memory resource usages.
69-
//
70-
// The proposed usage can be smaller when in-memory resource usages are out of sync
71-
// with ResourceQuota.Status.Used. In this case, TryUseResources will still update
72-
// in-memory usages.
73-
//
74-
// For example, when a user deletes a pod that consumes
75-
// 100m cpu and immediately creates another pod that consumes 1m cpu in a namespace,
76-
// the HRQ ResourceQuota reconciler will be triggered by ResourceQuota.Status changes twice:
77-
// 1) when pod is deleted.
78-
// 2) when pod is created.
79-
// The HRQ ResourceQuota reconciler will compare `local` with ResourceQuota.Status.Used
80-
// and will update `local` (and `subtree` in namespace and its ancestors) if it
81-
// sees `local` is different from ResourceQuota.Status.Used.
59+
// Based on observations, the K8s ResourceQuota admission controller is called only when a resource
60+
// is consumed, not when a resource is released. Therefore, in most cases, the proposed resource
61+
// usages that the HRQ admission controller received should be larger than in-memory resource
62+
// usages. However, this function is robust to (that is, always allows) decreases as well, mainly
63+
// because it's easier to test - plus, who knows, the K8s behaviour may change in the future.
8264
//
83-
// The HRQ admission coontroller will be triggered once when the pod is created.
84-
// If the HRQ admission coontroller locks the in-memory forest before step 1)
85-
// of the HRQ ResourceQuota reconciler, the HRQ admission controller will see proposed
86-
// usages are smaller than in-memory usages and will update in-memory usages.
87-
// When ResourceQuota reconciler receives the lock later, it will notice the
88-
// ResourceQuota.Status.Used is the same as in-memory usages and will not
89-
// update in-memory usages.
65+
// This may allow one weird case where a user may be allowed to use something they weren't supposed
66+
// to. Let's say you're well over your limit, and then in quick succession, some resources are
67+
// deleted, and some _fewer_ are added, but enough to still go over the limit. In that case, there's
68+
// a race condition between this function being called, and the RQ reconciler updating the baseline
69+
// resource usage. If this function wins, it will look like resource usage is decreasing, and will
70+
// be incorrectly allowed. If the RQ reconciler runs first, we'll see that the usage is incorrectly
71+
// _increasing_ and it will be disallowed. However, I think the simplicity of not trying to prevent
72+
// this (hopefully very unlikely) corner case is more valuable than trying to catch it.
9073
func (n *Namespace) TryUseResources(rl v1.ResourceList) error {
91-
// The proposed resource usages are allowed if they are the same as current
92-
// resource usages in the namespace.
93-
if utils.Equals(rl, n.quotas.used.local) {
94-
return nil
95-
}
96-
9774
if err := n.canUseResources(rl); err != nil {
9875
// At least one of the proposed usage exceeds resource limits.
9976
return err
@@ -114,28 +91,32 @@ func (n *Namespace) TryUseResources(rl v1.ResourceList) error {
11491
func (n *Namespace) canUseResources(u v1.ResourceList) error {
11592
// For each resource, delta = proposed usage - current usage.
11693
delta := utils.Subtract(u, n.quotas.used.local)
117-
delta = utils.OmitZeroQuantity(delta)
94+
// Only consider *increasing* deltas; see comments to TryUseResources for details.
95+
increases := utils.OmitLTEZero(delta)
11896

11997
for _, nsnm := range n.AncestryNames() {
12098
ns := n.forest.Get(nsnm)
121-
r := utils.Copy(ns.quotas.used.subtree)
122-
r = utils.AddIfExists(delta, r)
123-
allowed, nm, exceeded := checkLimits(ns.quotas.limits, r)
124-
if !allowed {
125-
// Construct the error message similar to the RQ exceeded quota error message -
126-
// "exceeded quota: gke-hc-hrq, requested: configmaps=1, used: configmaps=2, limited: configmaps=2"
127-
msg := fmt.Sprintf("exceeded hierarchical quota in namespace %q: %q", ns.name, nm)
128-
for _, er := range exceeded {
129-
rnm := er.String()
130-
// Get the requested, used, limited quantity of the exceeded resource.
131-
rq := delta[er]
132-
uq := ns.quotas.used.subtree[er]
133-
lq := ns.quotas.limits[nm][er]
134-
msg += fmt.Sprintf(", requested: %s=%v, used: %s=%v, limited: %s=%v",
135-
rnm, &rq, rnm, &uq, rnm, &lq)
136-
}
137-
return fmt.Errorf(msg)
99+
// Use AddIfExists (not Add) because we want to ignore any resources that aren't increasing when
100+
// checking against the limits.
101+
proposed := utils.AddIfExists(increases, ns.quotas.used.subtree)
102+
allowed, nm, exceeded := checkLimits(ns.quotas.limits, proposed)
103+
if allowed {
104+
continue
138105
}
106+
107+
// Construct the error message similar to the RQ exceeded quota error message -
108+
// "exceeded quota: gke-hc-hrq, requested: configmaps=1, used: configmaps=2, limited: configmaps=2"
109+
msg := fmt.Sprintf("exceeded hierarchical quota in namespace %q: %q", ns.name, nm)
110+
for _, er := range exceeded {
111+
rnm := er.String()
112+
// Get the requested, used, limited quantity of the exceeded resource.
113+
rq := increases[er]
114+
uq := ns.quotas.used.subtree[er]
115+
lq := ns.quotas.limits[nm][er]
116+
msg += fmt.Sprintf(", requested: %s=%v, used: %s=%v, limited: %s=%v",
117+
rnm, &rq, rnm, &uq, rnm, &lq)
118+
}
119+
return fmt.Errorf(msg)
139120
}
140121

141122
return nil
@@ -159,8 +140,10 @@ func (n *Namespace) canUseResources(u v1.ResourceList) error {
159140
// usages to the new ancestors following a parent update
160141
func (n *Namespace) UseResources(newUsage v1.ResourceList) {
161142
oldUsage := n.quotas.used.local
162-
// We only consider limited usages.
163-
newUsage = utils.CleanupUnneeded(newUsage, n.Limits())
143+
144+
// We only store the usages we care about
145+
newUsage = utils.FilterUnlimited(newUsage, n.Limits())
146+
164147
// Early exit if there's no usages change. It's safe because the forest would
165148
// remain unchanged and the caller would always enqueue all ancestor HRQs.
166149
if utils.Equals(oldUsage, newUsage) {
@@ -181,7 +164,7 @@ func (n *Namespace) UseResources(newUsage v1.ResourceList) {
181164

182165
// Get the new subtree usage and remove no longer limited usages.
183166
newSubUsg := utils.Add(delta, ns.quotas.used.subtree)
184-
ns.UpdateSubtreeUsages(newSubUsg)
167+
ns.quotas.used.subtree = utils.FilterUnlimited(newSubUsg, ns.Limits())
185168
}
186169
}
187170

@@ -236,9 +219,13 @@ func (n *Namespace) GetSubtreeUsages() v1.ResourceList {
236219
return u
237220
}
238221

239-
// UpdateSubtreeUsages sets the subtree resource usages.
240-
func (n *Namespace) UpdateSubtreeUsages(rl v1.ResourceList) {
241-
n.quotas.used.subtree = utils.CleanupUnneeded(rl, n.Limits())
222+
// TestOnlySetSubtreeUsage overwrites the actual, calculated subtree usages and replaces them with
223+
// arbitrary garbage. Needless to say, you should never call this, unless you're testing HNC's
224+
// ability to recover from arbitrary garbage.
225+
//
226+
// The passed-in arg is used as-is, not copied. This is test code, so deal with it 😎
227+
func (n *Namespace) TestOnlySetSubtreeUsage(rl v1.ResourceList) {
228+
n.quotas.used.subtree = rl
242229
}
243230

244231
// RemoveLimits removes limits specified by the HierarchicalResourceQuota object

internal/forest/namespacehrq_test.go

Lines changed: 7 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -151,35 +151,13 @@ func TestTryUseResources(t *testing.T) {
151151
req: testReq{ns: "bar", use: "secrets 1 pods 1"},
152152
expected: usages{"foo": "secrets 2", "bar": "secrets 1 pods 1"},
153153
}, {
154-
// Note: we are expecting an error here because this is a unit test for
155-
// the `TryUseResources` function. In reality, negative resource usage
156-
// changes won't get an error since K8s resource quota admission
157-
// controller is not called, thus neither our webhook nor this function
158-
// will be called to prevent users from deleting exceeded resources.
159-
name: "deny decrease if it still exceeds limits",
154+
name: "allow decrease if it still exceeds limits",
160155
setup: []testNS{
161156
{nm: "foo", limits: "secrets 2", local: "secrets 1"},
162157
{nm: "bar", ancs: "foo", limits: "secrets 100", local: "secrets 4"},
163158
},
164-
req: testReq{ns: "bar", use: "secrets 2"},
165-
// In reality it won't show negative number since neither K8s rq validator
166-
// nor our rq validator will be called to output this message.
167-
error: []wantError{{ns: "foo", nm: "fooHrq", exceeded: []exceeded{{name: "secrets", requested: "-2", used: "5", limited: "2"}}}},
168-
// Usages should not be updated.
169-
expected: usages{"foo": "secrets 5", "bar": "secrets 4"},
170-
}, {
171-
// Note: we are expecting an error here for the same reason in the above test.
172-
name: "deny decrease if it still exceeds limits, while not affecting other resource usages",
173-
setup: []testNS{
174-
{nm: "foo", limits: "secrets 2", local: "secrets 1"},
175-
{nm: "bar", ancs: "foo", limits: "secrets 100 pods 2", local: "secrets 4 pods 1"},
176-
},
177-
req: testReq{ns: "bar", use: "secrets 2 pods 1"},
178-
// In reality it won't show negative number since neither K8s rq validator
179-
// nor our rq validator will be called to output this message.
180-
error: []wantError{{ns: "foo", nm: "fooHrq", exceeded: []exceeded{{name: "secrets", requested: "-2", used: "5", limited: "2"}}}},
181-
// Usages should not be updated.
182-
expected: usages{"foo": "secrets 5", "bar": "secrets 4 pods 1"},
159+
req: testReq{ns: "bar", use: "secrets 2"},
160+
expected: usages{"foo": "secrets 3", "bar": "secrets 2"},
183161
}}
184162
for _, tc := range tests {
185163
t.Run(tc.name, func(t *testing.T) {
@@ -251,53 +229,33 @@ func TestCanUseResource(t *testing.T) {
251229
req: testReq{ns: "bar", use: "secrets 3 pods 1"},
252230
want: []wantError{{ns: "foo", nm: "fooHrq", exceeded: []exceeded{{name: "secrets", requested: "2", used: "2", limited: "2"}}}},
253231
}, {
254-
// Note: we are expecting an error here because this is a unit test for
255-
// the `CanUseResources` function. In reality, negative resource usage
256-
// changes won't get an error since K8s resource quota admission
257-
// controller is not called, thus neither our webhook nor this function
258-
// will be called to prevent users from deleting exceeded resources.
259-
name: "deny decrease exceeding local limit when has parent",
232+
name: "allow decrease exceeding local limit when has parent",
260233
setup: []testNS{
261234
{nm: "foo", limits: "secrets 2", local: "secrets 1"},
262235
{nm: "bar", ancs: "foo", limits: "pods 2", local: "pods 4"},
263236
},
264237
req: testReq{ns: "bar", use: "pods 3"},
265-
// In reality it won't show negative number since neither K8s rq validator
266-
// nor our rq validator will be called to output this message.
267-
want: []wantError{{ns: "bar", nm: "barHrq", exceeded: []exceeded{{name: "pods", requested: "-1", used: "4", limited: "2"}}}},
268238
}, {
269-
// Note: we are expecting an error here for the same reason in the above test.
270-
name: "deny decrease exceeding local limit when has parent, while not affecting other resource usages",
239+
name: "allow decrease exceeding local limit when has parent, while not affecting other resource usages",
271240
setup: []testNS{
272241
{nm: "foo", limits: "secrets 2", local: "secrets 1"},
273242
{nm: "bar", ancs: "foo", limits: "secrets 100 pods 2", local: "secrets 1 pods 4"},
274243
},
275244
req: testReq{ns: "bar", use: "secrets 1 pods 3"},
276-
// In reality it won't show negative number since neither K8s rq validator
277-
// nor our rq validator will be called to output this message.
278-
want: []wantError{{ns: "bar", nm: "barHrq", exceeded: []exceeded{{name: "pods", requested: "-1", used: "4", limited: "2"}}}},
279245
}, {
280-
// Note: we are expecting an error here for the same reason in the above test.
281-
name: "deny decrease exceeding parent limit when has parent",
246+
name: "allow decrease exceeding parent limit when has parent",
282247
setup: []testNS{
283248
{nm: "foo", limits: "secrets 2", local: "secrets 1"},
284249
{nm: "bar", ancs: "foo", limits: "secrets 100", local: "secrets 4"},
285250
},
286251
req: testReq{ns: "bar", use: "secrets 3"},
287-
// In reality it won't show negative number since neither K8s rq validator
288-
// nor our rq validator will be called to output this message.
289-
want: []wantError{{ns: "foo", nm: "fooHrq", exceeded: []exceeded{{name: "secrets", requested: "-1", used: "5", limited: "2"}}}},
290252
}, {
291-
// Note: we are expecting an error here for the same reason in the above test.
292-
name: "deny decrease exceeding parent limit when has parent, while not affecting other resource usages",
253+
name: "allow decrease exceeding parent limit when has parent, while not affecting other resource usages",
293254
setup: []testNS{
294255
{nm: "foo", limits: "secrets 2", local: "secrets 1"},
295256
{nm: "bar", ancs: "foo", limits: "secrets 100 pods 2", local: "secrets 4 pods 2"},
296257
},
297258
req: testReq{ns: "bar", use: "secrets 3 pods 2"},
298-
// In reality it won't show negative number since neither K8s rq validator
299-
// nor our rq validator will be called to output this message.
300-
want: []wantError{{ns: "foo", nm: "fooHrq", exceeded: []exceeded{{name: "secrets", requested: "-1", used: "5", limited: "2"}}}},
301259
}, {
302260
name: "deny multiple resources exceeding limits",
303261
setup: []testNS{

internal/hrq/hrqreconciler.go

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -141,15 +141,9 @@ func (r *HierarchicalResourceQuotaReconciler) syncUsages(inst *api.HierarchicalR
141141
}
142142
ns := r.Forest.Get(inst.GetNamespace())
143143

144-
// Get all usage counts in this namespace
145-
usages := ns.GetSubtreeUsages()
146-
147-
// Get the names of all resource types being limited by this particular HRQ
148-
nms := utils.ResourceNames(inst.Spec.Hard)
149-
150144
// Filter the usages to only include the resource types being limited by this HRQ and write those
151145
// usages back to the HRQ status.
152-
inst.Status.Used = utils.Mask(usages, nms)
146+
inst.Status.Used = utils.FilterUnlimited(ns.GetSubtreeUsages(), inst.Spec.Hard)
153147
}
154148

155149
func isDeleted(inst *api.HierarchicalResourceQuota) bool {

internal/hrq/hrqreconciler_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ var _ = Describe("HRQ reconciler tests", func() {
230230
func forestSetSubtreeUsages(ns string, args ...string) {
231231
TestForest.Lock()
232232
defer TestForest.Unlock()
233-
TestForest.Get(ns).UpdateSubtreeUsages(argsToResourceList(0, args...))
233+
TestForest.Get(ns).TestOnlySetSubtreeUsage(argsToResourceList(0, args...))
234234
}
235235

236236
func getHRQStatus(ctx context.Context, ns, nm string) func() v1.ResourceList {

internal/hrq/utils/resources.go

Lines changed: 11 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -95,25 +95,18 @@ func Subtract(a v1.ResourceList, b v1.ResourceList) v1.ResourceList {
9595
return result
9696
}
9797

98-
// OmitZeroQuantity returns a list omitting zero quantity resources.
99-
func OmitZeroQuantity(a v1.ResourceList) v1.ResourceList {
98+
// OmitLTEZero returns a list omitting zero or negative quantity resources.
99+
func OmitLTEZero(a v1.ResourceList) v1.ResourceList {
100100
for n, q := range a {
101101
if q.IsZero() {
102102
delete(a, n)
103+
} else if q.AsApproximateFloat64() <= 0 {
104+
delete(a, n)
103105
}
104106
}
105107
return a
106108
}
107109

108-
// Copy returns a deep copy of a ResourceList.
109-
func Copy(rl v1.ResourceList) v1.ResourceList {
110-
rs := make(v1.ResourceList)
111-
for k, v := range rl {
112-
rs[k] = v.DeepCopy()
113-
}
114-
return rs
115-
}
116-
117110
// Min returns the result of Min(a, b) (i.e., smallest resource quantity) for
118111
// each named resource. If a resource only exists in one but not all ResourceLists
119112
// inputs, it will be returned.
@@ -136,32 +129,22 @@ func Min(a v1.ResourceList, b v1.ResourceList) v1.ResourceList {
136129
return result
137130
}
138131

139-
// Contains returns true if the specified item is in the list of items
140-
func Contains(items []v1.ResourceName, item v1.ResourceName) bool {
141-
for _, i := range items {
142-
if i == item {
143-
return true
144-
}
145-
}
146-
return false
147-
}
148-
149-
// CleanupUnneeded cleans up the raw usages by omitting not limited usages.
150-
func CleanupUnneeded(rawUsages v1.ResourceList, limits v1.ResourceList) v1.ResourceList {
151-
return Mask(rawUsages, ResourceNames(limits))
132+
// FilterUnlimited cleans up the raw usages by omitting not limited usages.
133+
func FilterUnlimited(rawUsages v1.ResourceList, limits v1.ResourceList) v1.ResourceList {
134+
return mask(rawUsages, resourceNames(limits))
152135
}
153136

154-
// ResourceNames returns a list of all resource names in the ResourceList
155-
func ResourceNames(resources v1.ResourceList) []v1.ResourceName {
137+
// resourceNames returns a list of all resource names in the ResourceList
138+
func resourceNames(resources v1.ResourceList) []v1.ResourceName {
156139
result := []v1.ResourceName{}
157140
for resourceName := range resources {
158141
result = append(result, resourceName)
159142
}
160143
return result
161144
}
162145

163-
// Mask returns a new resource list that only has the values with the specified names
164-
func Mask(resources v1.ResourceList, names []v1.ResourceName) v1.ResourceList {
146+
// mask returns a new resource list that only has the values with the specified names
147+
func mask(resources v1.ResourceList, names []v1.ResourceName) v1.ResourceList {
165148
nameSet := toSet(names)
166149
result := v1.ResourceList{}
167150
for key, value := range resources {

0 commit comments

Comments
 (0)