Skip to content

Commit 3d3500e

Browse files
authored
runners: Revert things related to batch termination (#6868)
This reverts the following PRs: * #6859 * #6858 * #6855 * #6854 * #6852 These were causing issues where scale-down was too aggressively scaling down instances leading to runners not being refreshed by scale-up. I do think the SSM expiration stuff is worth a re-do though but there were merge conflicts so I have to revert the entire thing.
1 parent cb05706 commit 3d3500e

File tree

5 files changed

+222
-341
lines changed

5 files changed

+222
-341
lines changed

terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/runners.test.ts

Lines changed: 51 additions & 184 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@ import {
22
RunnerInputParameters,
33
createRunner,
44
findAmiID,
5+
getParameterNameForRunner,
56
listRunners,
67
listSSMParameters,
78
resetRunnersCaches,
89
terminateRunner,
910
tryReuseRunner,
10-
terminateRunners,
1111
} from './runners';
1212
import { RunnerInfo } from './utils';
1313
import { ScaleUpMetrics } from './metrics';
@@ -326,11 +326,10 @@ describe('listSSMParameters', () => {
326326
});
327327
});
328328

329-
describe('terminateRunners', () => {
329+
describe('terminateRunner', () => {
330330
beforeEach(() => {
331331
mockSSMdescribeParametersRet.mockClear();
332332
mockEC2.terminateInstances.mockClear();
333-
mockSSM.deleteParameter.mockClear();
334333
const config = {
335334
environment: 'gi-ci',
336335
minimumRunningTimeInMinutes: 45,
@@ -340,143 +339,68 @@ describe('terminateRunners', () => {
340339
resetRunnersCaches();
341340
});
342341

343-
it('terminates multiple runners in same region successfully', async () => {
344-
const runners: RunnerInfo[] = [
345-
{
346-
awsRegion: 'us-east-1',
347-
instanceId: 'i-1234',
348-
environment: 'gi-ci',
349-
},
350-
{
351-
awsRegion: 'us-east-1',
352-
instanceId: 'i-5678',
353-
environment: 'gi-ci',
354-
},
355-
];
356-
357-
await terminateRunners(runners, metrics);
358-
359-
expect(mockEC2.terminateInstances).toBeCalledTimes(1);
360-
expect(mockEC2.terminateInstances).toBeCalledWith({
361-
InstanceIds: ['i-1234', 'i-5678'],
342+
it('calls terminateInstances', async () => {
343+
const runner: RunnerInfo = {
344+
awsRegion: Config.Instance.awsRegion,
345+
instanceId: 'i-1234',
346+
environment: 'gi-ci',
347+
};
348+
mockSSMdescribeParametersRet.mockResolvedValueOnce({
349+
Parameters: [getParameterNameForRunner(runner.environment as string, runner.instanceId)].map((s) => {
350+
return { Name: s };
351+
}),
362352
});
363-
});
364-
365-
it('terminates runners across multiple regions', async () => {
366-
const runners: RunnerInfo[] = [
367-
{
368-
awsRegion: 'us-east-1',
369-
instanceId: 'i-1234',
370-
environment: 'gi-ci',
371-
},
372-
{
373-
awsRegion: 'us-west-2',
374-
instanceId: 'i-5678',
375-
environment: 'gi-ci',
376-
},
377-
];
378-
379-
await terminateRunners(runners, metrics);
353+
await terminateRunner(runner, metrics);
380354

381-
expect(mockEC2.terminateInstances).toBeCalledTimes(2);
382-
expect(mockEC2.terminateInstances).toHaveBeenNthCalledWith(1, {
383-
InstanceIds: ['i-1234'],
355+
expect(mockEC2.terminateInstances).toBeCalledWith({
356+
InstanceIds: [runner.instanceId],
384357
});
385-
expect(mockEC2.terminateInstances).toHaveBeenNthCalledWith(2, {
386-
InstanceIds: ['i-5678'],
358+
expect(mockSSM.describeParameters).toBeCalledTimes(1);
359+
expect(mockSSM.deleteParameter).toBeCalledTimes(1);
360+
expect(mockSSM.deleteParameter).toBeCalledWith({
361+
Name: getParameterNameForRunner(runner.environment as string, runner.instanceId),
387362
});
388363
});
389364

390-
it('handles partial failure - terminates some runners but fails on others', async () => {
391-
const runners: RunnerInfo[] = [
392-
{
393-
awsRegion: 'us-east-1',
394-
instanceId: 'i-1234',
395-
environment: 'gi-ci',
396-
},
397-
{
398-
awsRegion: 'us-east-1',
399-
instanceId: 'i-5678',
400-
environment: 'gi-ci',
401-
},
402-
{
403-
awsRegion: 'us-west-2',
404-
instanceId: 'i-9999',
405-
environment: 'gi-ci',
406-
},
407-
];
408-
409-
// First region succeeds, second region fails
410-
mockEC2.terminateInstances
411-
.mockReturnValueOnce({
412-
promise: jest.fn().mockResolvedValueOnce({}),
413-
})
414-
.mockReturnValueOnce({
415-
promise: jest.fn().mockRejectedValueOnce(new Error('Region failure')),
416-
});
417-
418-
await expect(terminateRunners(runners, metrics)).rejects.toThrow(
419-
'Failed to terminate some runners: Instance i-9999: Region failure',
420-
);
421-
422-
expect(mockEC2.terminateInstances).toBeCalledTimes(2);
365+
it('fails to terminate', async () => {
366+
const errMsg = 'Error message';
367+
const runner: RunnerInfo = {
368+
awsRegion: Config.Instance.awsRegion,
369+
instanceId: '1234',
370+
};
371+
mockEC2.terminateInstances.mockClear().mockReturnValue({
372+
promise: jest.fn().mockRejectedValueOnce(Error(errMsg)),
373+
});
374+
expect(terminateRunner(runner, metrics)).rejects.toThrowError(errMsg);
375+
expect(mockSSM.describeParameters).not.toBeCalled();
376+
expect(mockSSM.deleteParameter).not.toBeCalled();
423377
});
424378

425-
it('handles large batches by splitting into chunks', async () => {
426-
// Create 150 runners to test batching (should split into 2 batches of 100 and 50)
427-
const runners: RunnerInfo[] = Array.from({ length: 150 }, (_, i) => ({
428-
awsRegion: 'us-east-1',
429-
instanceId: `i-${i.toString().padStart(4, '0')}`,
430-
environment: 'gi-ci',
431-
}));
432-
433-
await terminateRunners(runners, metrics);
379+
it('fails to list parameters on terminate, then force delete all next parameters', async () => {
380+
const runner1: RunnerInfo = {
381+
awsRegion: Config.Instance.awsRegion,
382+
instanceId: '1234',
383+
environment: 'environ',
384+
};
385+
const runner2: RunnerInfo = {
386+
awsRegion: Config.Instance.awsRegion,
387+
instanceId: '1235',
388+
environment: 'environ',
389+
};
390+
mockSSMdescribeParametersRet.mockRejectedValueOnce('Some Error');
391+
await terminateRunner(runner1, metrics);
392+
await terminateRunner(runner2, metrics);
434393

435-
// Should make 2 terminate calls (batches of 100 and 50)
436394
expect(mockEC2.terminateInstances).toBeCalledTimes(2);
437-
expect(mockEC2.terminateInstances).toHaveBeenNthCalledWith(1, {
438-
InstanceIds: runners.slice(0, 100).map((r) => r.instanceId),
395+
expect(mockSSM.describeParameters).toBeCalledTimes(1);
396+
expect(mockSSM.deleteParameter).toBeCalledTimes(2);
397+
expect(mockSSM.deleteParameter).toBeCalledWith({
398+
Name: getParameterNameForRunner(runner1.environment as string, runner1.instanceId),
439399
});
440-
expect(mockEC2.terminateInstances).toHaveBeenNthCalledWith(2, {
441-
InstanceIds: runners.slice(100, 150).map((r) => r.instanceId),
400+
expect(mockSSM.deleteParameter).toBeCalledWith({
401+
Name: getParameterNameForRunner(runner2.environment as string, runner2.instanceId),
442402
});
443403
});
444-
445-
it('cleans up SSM parameters for successful batches even when later batch fails', async () => {
446-
// Create runners that will be split into 2 batches
447-
const runners: RunnerInfo[] = Array.from({ length: 150 }, (_, i) => ({
448-
awsRegion: 'us-east-1',
449-
instanceId: `i-${i.toString().padStart(4, '0')}`,
450-
environment: 'gi-ci',
451-
}));
452-
453-
// First batch succeeds, second batch fails
454-
mockEC2.terminateInstances
455-
.mockReturnValueOnce({
456-
promise: jest.fn().mockResolvedValueOnce({}),
457-
})
458-
.mockReturnValueOnce({
459-
promise: jest.fn().mockRejectedValueOnce(new Error('Batch 2 failed')),
460-
});
461-
462-
await expect(terminateRunners(runners, metrics)).rejects.toThrow('Failed to terminate some runners');
463-
464-
expect(mockEC2.terminateInstances).toBeCalledTimes(2);
465-
});
466-
467-
it('handles SSM parameter cleanup failure gracefully', async () => {
468-
const runners: RunnerInfo[] = [
469-
{
470-
awsRegion: 'us-east-1',
471-
instanceId: 'i-1234',
472-
environment: 'gi-ci',
473-
},
474-
];
475-
476-
await terminateRunners(runners, metrics);
477-
478-
expect(mockEC2.terminateInstances).toBeCalledTimes(1);
479-
});
480404
});
481405

482406
describe('findAmiID', () => {
@@ -1339,28 +1263,7 @@ describe('createRunner', () => {
13391263
Name: 'wg113-i-1234',
13401264
Value: 'us-east-1-BLAH',
13411265
Type: 'SecureString',
1342-
Policies: expect.any(String),
13431266
});
1344-
1345-
// Verify the Policies parameter contains the correct expiration policy structure
1346-
const putParameterCall = mockSSM.putParameter.mock.calls[0][0];
1347-
const policies = JSON.parse(putParameterCall.Policies);
1348-
expect(policies).toEqual([
1349-
{
1350-
Type: 'Expiration',
1351-
Version: '1.0',
1352-
Attributes: {
1353-
Timestamp: expect.any(String),
1354-
},
1355-
},
1356-
]);
1357-
1358-
// Verify the timestamp is approximately 30 minutes in the future
1359-
const expirationTime = new Date(policies[0].Attributes.Timestamp);
1360-
const now = Date.now();
1361-
const timeDiff = expirationTime.getTime() - now;
1362-
expect(timeDiff).toBeGreaterThan(25 * 60 * 1000); // at least 25 minutes (allowing for test execution time)
1363-
expect(timeDiff).toBeLessThan(35 * 60 * 1000); // at most 35 minutes (allowing for clock differences)
13641267
});
13651268

13661269
it('creates ssm experiment parameters when joining experiment', async () => {
@@ -1404,7 +1307,6 @@ describe('createRunner', () => {
14041307
Name: 'wg113-i-1234',
14051308
Value: 'us-east-1-BLAH #ON_AMI_EXPERIMENT',
14061309
Type: 'SecureString',
1407-
Policies: expect.any(String),
14081310
});
14091311
expect(mockEC2.runInstances).toBeCalledTimes(1);
14101312
expect(mockEC2.runInstances).toBeCalledWith(
@@ -1723,38 +1625,3 @@ describe('createRunner', () => {
17231625
});
17241626
});
17251627
});
1726-
1727-
describe('terminateRunner', () => {
1728-
beforeEach(() => {
1729-
mockSSMdescribeParametersRet.mockClear();
1730-
mockEC2.terminateInstances.mockClear();
1731-
mockSSM.deleteParameter.mockClear();
1732-
const config = {
1733-
environment: 'gi-ci',
1734-
minimumRunningTimeInMinutes: 45,
1735-
};
1736-
jest.spyOn(Config, 'Instance', 'get').mockImplementation(() => config as unknown as Config);
1737-
1738-
resetRunnersCaches();
1739-
});
1740-
1741-
it('delegates to terminateRunners with single runner array', async () => {
1742-
const runner: RunnerInfo = {
1743-
awsRegion: 'us-east-1',
1744-
instanceId: 'i-1234',
1745-
environment: 'gi-ci',
1746-
};
1747-
1748-
// Mock terminateRunners by mocking the underlying calls
1749-
mockEC2.terminateInstances.mockReturnValueOnce({
1750-
promise: jest.fn().mockResolvedValueOnce({}),
1751-
});
1752-
1753-
await terminateRunner(runner, metrics);
1754-
1755-
// Verify the calls match what terminateRunners would do with a single runner
1756-
expect(mockEC2.terminateInstances).toBeCalledWith({
1757-
InstanceIds: ['i-1234'],
1758-
});
1759-
});
1760-
});

0 commit comments

Comments
 (0)