Skip to content

Commit fad0801

Browse files
authored
[server] Fix broken /ready endpoint and chaned the probe to be a StartupProbe (#20672)
Tool: gitpod/catfood.gitpod.cloud
1 parent 89e0930 commit fad0801

File tree

4 files changed

+65
-25
lines changed

4 files changed

+65
-25
lines changed

components/server/src/liveness/probes.ts

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,41 @@
44
* See License.AGPL.txt in the project root for license information.
55
*/
66

7+
import * as http from "http";
78
import express from "express";
89
import { inject, injectable } from "inversify";
910
import { LivenessController } from "./liveness-controller";
1011
import { ReadinessController } from "./readiness-controller";
12+
import { AddressInfo } from "net";
1113

1214
@injectable()
1315
export class ProbesApp {
16+
private app: express.Application;
17+
private httpServer: http.Server | undefined = undefined;
18+
1419
constructor(
1520
@inject(LivenessController) protected readonly livenessController: LivenessController,
1621
@inject(ReadinessController) protected readonly readinessController: ReadinessController,
17-
) {}
18-
19-
public create(): express.Application {
22+
) {
2023
const probesApp = express();
2124
probesApp.use("/live", this.livenessController.apiRouter);
2225
probesApp.use("/ready", this.readinessController.apiRouter);
23-
return probesApp;
26+
this.app = probesApp;
27+
}
28+
29+
public async start(port: number): Promise<number> {
30+
await this.readinessController.start();
31+
32+
return new Promise((resolve, reject) => {
33+
const probeServer = this.app.listen(port, () => {
34+
resolve((<AddressInfo>probeServer.address()).port);
35+
});
36+
this.httpServer = probeServer;
37+
});
38+
}
39+
40+
public async stop(): Promise<void> {
41+
this.httpServer?.close();
42+
await this.readinessController.stop();
2443
}
2544
}

components/server/src/liveness/readiness-controller.ts

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,30 +12,36 @@ import { log } from "@gitpod/gitpod-protocol/lib/util/logging";
1212
import { v1 } from "@authzed/authzed-node";
1313
import { getExperimentsClientForBackend } from "@gitpod/gitpod-protocol/lib/experiments/configcat-server";
1414
import { Redis } from "ioredis";
15+
import { repeat } from "@gitpod/gitpod-protocol/lib/util/repeat";
16+
import { Disposable, DisposableCollection } from "@gitpod/gitpod-protocol";
1517

1618
@injectable()
1719
export class ReadinessController {
1820
@inject(TypeORM) protected readonly typeOrm: TypeORM;
1921
@inject(SpiceDBClientProvider) protected readonly spiceDBClientProvider: SpiceDBClientProvider;
2022
@inject(Redis) protected readonly redis: Redis;
2123

24+
private readinessProbeEnabled: boolean = true;
25+
private disposables: DisposableCollection = new DisposableCollection();
26+
2227
get apiRouter(): express.Router {
2328
const router = express.Router();
2429
this.addReadinessHandler(router);
2530
return router;
2631
}
2732

33+
public async start() {
34+
this.disposables.push(this.startPollingFeatureFlag());
35+
}
36+
37+
public async stop() {
38+
this.disposables.dispose();
39+
}
40+
2841
protected addReadinessHandler(router: express.Router) {
2942
router.get("/", async (_, res) => {
3043
try {
31-
// Check feature flag first
32-
const readinessProbeEnabled = await getExperimentsClientForBackend().getValueAsync(
33-
"server_readiness_probe",
34-
true, // Default to readiness probe, skip if false
35-
{},
36-
);
37-
38-
if (!readinessProbeEnabled) {
44+
if (!this.readinessProbeEnabled) {
3945
log.debug("Readiness check skipped due to feature flag");
4046
res.status(200);
4147
return;
@@ -67,6 +73,7 @@ export class ReadinessController {
6773

6874
// All connections are good
6975
res.status(200).send("Ready");
76+
log.debug("Readiness check successful");
7077
} catch (error) {
7178
log.error("Readiness check failed", error);
7279
res.status(503).send("Readiness check failed");
@@ -79,6 +86,7 @@ export class ReadinessController {
7986
const connection = await this.typeOrm.getConnection();
8087
// Simple query to verify connection is working
8188
await connection.query("SELECT 1");
89+
log.debug("Database connection check successful");
8290
return true;
8391
} catch (error) {
8492
log.error("Database connection check failed", error);
@@ -113,4 +121,21 @@ export class ReadinessController {
113121
return false;
114122
}
115123
}
124+
125+
private startPollingFeatureFlag(): Disposable {
126+
return repeat(async () => {
127+
// Check feature flag first
128+
const readinessProbeEnabled = await getExperimentsClientForBackend().getValueAsync(
129+
"server_readiness_probe",
130+
true, // Default to readiness probe, skip if false
131+
{},
132+
);
133+
134+
log.debug("Feature flag server_readiness_probe updated", {
135+
readinessProbeEnabled,
136+
oldValue: this.readinessProbeEnabled,
137+
});
138+
this.readinessProbeEnabled = readinessProbeEnabled;
139+
}, 10_000);
140+
}
116141
}

components/server/src/server.ts

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ import {
5252
} from "./workspace/headless-log-service";
5353
import { runWithRequestContext } from "./util/request-context";
5454
import { AnalyticsController } from "./analytics-controller";
55-
import { ProbesApp as ProbesAppProvider } from "./liveness/probes";
55+
import { ProbesApp } from "./liveness/probes";
5656

5757
const MONITORING_PORT = 9500;
5858
const IAM_SESSION_PORT = 9876;
@@ -69,8 +69,6 @@ export class Server {
6969
protected privateApiServer?: http.Server;
7070

7171
protected readonly eventEmitter = new EventEmitter();
72-
protected probesApp: express.Application;
73-
protected probesServer?: http.Server;
7472
protected app?: express.Application;
7573
protected httpServer?: http.Server;
7674
protected monitoringApp?: express.Application;
@@ -105,7 +103,7 @@ export class Server {
105103
@inject(API) private readonly api: API,
106104
@inject(RedisSubscriber) private readonly redisSubscriber: RedisSubscriber,
107105
@inject(AnalyticsController) private readonly analyticsController: AnalyticsController,
108-
@inject(ProbesAppProvider) private readonly probesAppProvider: ProbesAppProvider,
106+
@inject(ProbesApp) private readonly probesApp: ProbesApp,
109107
) {}
110108

111109
public async init(app: express.Application) {
@@ -119,9 +117,6 @@ export class Server {
119117
await this.typeOrm.connect();
120118
log.info("connected to DB");
121119

122-
// probes
123-
this.probesApp = this.probesAppProvider.create();
124-
125120
// metrics
126121
app.use((req: express.Request, res: express.Response, next: express.NextFunction) => {
127122
const startTime = Date.now();
@@ -359,10 +354,9 @@ export class Server {
359354
throw new Error("server cannot start, not initialized");
360355
}
361356

362-
const probeServer = this.probesApp.listen(PROBES_PORT, () => {
363-
log.info(`probes server listening on port: ${(<AddressInfo>probeServer.address()).port}`);
357+
this.probesApp.start(PROBES_PORT).then((port) => {
358+
log.info(`probes server listening on port: ${port}`);
364359
});
365-
this.probesServer = probeServer;
366360

367361
const httpServer = this.app.listen(port, () => {
368362
this.eventEmitter.emit(Server.EVENT_ON_START, httpServer);
@@ -419,7 +413,7 @@ export class Server {
419413
race(this.stopServer(this.httpServer), "stop httpserver"),
420414
race(this.stopServer(this.privateApiServer), "stop private api server"),
421415
race(this.stopServer(this.publicApiServer), "stop public api server"),
422-
race(this.stopServer(this.probesServer), "stop probe server"),
416+
race(this.probesApp.stop(), "stop probe server"),
423417
race((async () => this.disposables.dispose())(), "dispose disposables"),
424418
]);
425419

install/installer/pkg/components/server/deployment.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,9 @@ func deployment(ctx *common.RenderContext) ([]runtime.Object, error) {
371371
PeriodSeconds: 10,
372372
FailureThreshold: 6,
373373
},
374-
ReadinessProbe: &corev1.Probe{
374+
// StartupProbe, as we are only interested in controlling the startup of the server pod, and
375+
// not interferring with the readiness afterwards.
376+
StartupProbe: &corev1.Probe{
375377
ProbeHandler: corev1.ProbeHandler{
376378
HTTPGet: &corev1.HTTPGetAction{
377379
Path: "/ready",
@@ -383,7 +385,7 @@ func deployment(ctx *common.RenderContext) ([]runtime.Object, error) {
383385
},
384386
InitialDelaySeconds: 5,
385387
PeriodSeconds: 10,
386-
FailureThreshold: 12, // try for 120 seconds
388+
FailureThreshold: 18, // try for 180 seconds, then the Pod is restarted
387389
},
388390
SecurityContext: &corev1.SecurityContext{
389391
Privileged: pointer.Bool(false),

0 commit comments

Comments
 (0)