Skip to content

Commit 4207e59

Browse files
authored
Ready check inside the runtime (#82)
* X * Improve * Remove log * Better graph * Fix * Fix
1 parent 7fe03a8 commit 4207e59

File tree

6 files changed

+214
-68
lines changed

6 files changed

+214
-68
lines changed

internal/artifacts.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ var opState []byte
5757
//go:embed config.yaml.tmpl
5858
var clConfigContent []byte
5959

60+
//go:embed utils/query.sh
61+
var queryReadyCheck []byte
62+
6063
type ArtifactsBuilder struct {
6164
outputDir string
6265
applyLatestL1Fork bool
@@ -230,6 +233,7 @@ func (b *ArtifactsBuilder) Build() (*Artifacts, error) {
230233
"testnet/genesis_validators_root.txt": hex.EncodeToString(state.GenesisValidatorsRoot()),
231234
"data_validator/": &lighthouseKeystore{privKeys: priv},
232235
"deterministic_p2p_key.txt": defaultDiscoveryPrivKey,
236+
"scripts/query.sh": queryReadyCheck,
233237
})
234238
if err != nil {
235239
return nil, err

internal/components.go

Lines changed: 11 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -178,14 +178,8 @@ func (o *OpGeth) Name() string {
178178

179179
var _ ServiceReady = &OpGeth{}
180180

181-
func (o *OpGeth) Ready(out io.Writer, service *service, ctx context.Context) error {
182-
logs := service.logs
183-
184-
if err := logs.WaitForLog("HTTP server started", 5*time.Second); err != nil {
185-
return err
186-
}
187-
188-
enodeLine, err := logs.FindLog("enode://")
181+
func (o *OpGeth) Ready(service *service) error {
182+
enodeLine, err := service.logs.FindLog("enode://")
189183
if err != nil {
190184
return err
191185
}
@@ -324,7 +318,14 @@ func (l *LighthouseBeaconNode) Run(svc *service, ctx *ExContext) {
324318
"--always-prepare-payload",
325319
"--prepare-payload-lookahead", "8000",
326320
"--suggested-fee-recipient", "0x690B9A9E9aa1C9dB991C7721a92d351Db4FaC990",
327-
)
321+
).
322+
WithReady(ReadyCheck{
323+
QueryURL: "http://localhost:3500/eth/v1/node/syncing",
324+
Interval: 1 * time.Second,
325+
Timeout: 30 * time.Second,
326+
Retries: 3,
327+
StartPeriod: 1 * time.Second,
328+
})
328329

329330
if l.MevBoostNode != "" {
330331
svc.WithArgs(
@@ -339,17 +340,6 @@ func (l *LighthouseBeaconNode) Name() string {
339340
return "lighthouse-beacon-node"
340341
}
341342

342-
var _ ServiceReady = &LighthouseBeaconNode{}
343-
344-
func (l *LighthouseBeaconNode) Ready(logOutput io.Writer, service *service, ctx context.Context) error {
345-
beaconNodeURL := fmt.Sprintf("http://localhost:%d", service.MustGetPort("http").HostPort)
346-
347-
if err := waitForChainAlive(ctx, logOutput, beaconNodeURL, 30*time.Second); err != nil {
348-
return err
349-
}
350-
return nil
351-
}
352-
353343
type LighthouseValidator struct {
354344
BeaconNode string
355345
}
@@ -407,6 +397,7 @@ func (m *MevBoostRelay) Run(service *service, ctx *ExContext) {
407397
WithImage("docker.io/flashbots/playground-utils").
408398
WithTag("latest").
409399
WithEntrypoint("mev-boost-relay").
400+
DependsOnHealthy(m.BeaconClient).
410401
WithArgs(
411402
"--api-listen-addr", "0.0.0.0",
412403
"--api-listen-port", `{{Port "http" 5555}}`,

internal/local_runner.go

Lines changed: 88 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"strings"
1212
"sync"
1313
"text/template"
14+
"time"
1415

1516
"github.com/charmbracelet/bubbles/spinner"
1617
"github.com/charmbracelet/lipgloss"
@@ -62,13 +63,15 @@ type LocalRunner struct {
6263

6364
type task struct {
6465
status string
66+
ready bool
6567
logs *os.File
6668
}
6769

6870
var (
6971
taskStatusPending = "pending"
7072
taskStatusStarted = "started"
7173
taskStatusDie = "die"
74+
taskStatusHealthy = "healthy"
7275
)
7376

7477
type taskUI struct {
@@ -188,10 +191,57 @@ func (d *LocalRunner) printStatus() {
188191
}
189192
}
190193

194+
func (d *LocalRunner) AreReady() bool {
195+
d.tasksMtx.Lock()
196+
defer d.tasksMtx.Unlock()
197+
198+
for name, task := range d.tasks {
199+
// ensure the task is not a host service
200+
if d.isHostService(name) {
201+
continue
202+
}
203+
204+
// first ensure the task has started
205+
if task.status != taskStatusStarted {
206+
return false
207+
}
208+
209+
// then ensure it is ready if it has a ready function
210+
svc := d.getService(name)
211+
if svc.readyCheck != nil {
212+
if !task.ready {
213+
return false
214+
}
215+
}
216+
}
217+
return true
218+
}
219+
220+
func (d *LocalRunner) WaitForReady(ctx context.Context, timeout time.Duration) error {
221+
for {
222+
select {
223+
case <-ctx.Done():
224+
return ctx.Err()
225+
226+
case <-time.After(1 * time.Second):
227+
if d.AreReady() {
228+
return nil
229+
}
230+
231+
case err := <-d.exitErr:
232+
return err
233+
}
234+
}
235+
}
236+
191237
func (d *LocalRunner) updateTaskStatus(name string, status string) {
192238
d.tasksMtx.Lock()
193239
defer d.tasksMtx.Unlock()
194-
d.tasks[name].status = status
240+
if status == taskStatusHealthy {
241+
d.tasks[name].ready = true
242+
} else {
243+
d.tasks[name].status = status
244+
}
195245

196246
if status == taskStatusDie {
197247
d.exitErr <- fmt.Errorf("container %s failed", name)
@@ -387,6 +437,39 @@ func (d *LocalRunner) toDockerComposeService(s *service) (map[string]interface{}
387437
service["environment"] = s.env
388438
}
389439

440+
if s.readyCheck != nil {
441+
var test []string
442+
if s.readyCheck.QueryURL != "" {
443+
// This is pretty much hardcoded for now.
444+
test = []string{"CMD-SHELL", "chmod +x /artifacts/scripts/query.sh && /artifacts/scripts/query.sh " + s.readyCheck.QueryURL}
445+
} else {
446+
test = s.readyCheck.Test
447+
}
448+
449+
service["healthcheck"] = map[string]interface{}{
450+
"test": test,
451+
"interval": s.readyCheck.Interval.String(),
452+
"timeout": s.readyCheck.Timeout.String(),
453+
"retries": s.readyCheck.Retries,
454+
"start_period": s.readyCheck.StartPeriod.String(),
455+
}
456+
}
457+
458+
if s.dependsOn != nil {
459+
depends := map[string]interface{}{}
460+
461+
for _, d := range s.dependsOn {
462+
if d.Condition == "" {
463+
depends[d.Name] = struct{}{}
464+
} else {
465+
depends[d.Name] = map[string]interface{}{
466+
"condition": d.Condition,
467+
}
468+
}
469+
}
470+
service["depends_on"] = depends
471+
}
472+
390473
if runtime.GOOS == "linux" {
391474
// We rely on host.docker.internal as the DNS address for the host inside
392475
// the container. But, this is only available on Macos and Windows.
@@ -541,6 +624,10 @@ func (d *LocalRunner) trackContainerStatusAndLogs() {
541624
case events.ActionDie:
542625
d.updateTaskStatus(name, taskStatusDie)
543626
log.Info("container died", "name", name)
627+
628+
case events.ActionHealthStatusHealthy:
629+
d.updateTaskStatus(name, taskStatusHealthy)
630+
log.Info("container is healthy", "name", name)
544631
}
545632

546633
case err := <-errCh:

internal/manifest.go

Lines changed: 74 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -81,37 +81,15 @@ type Service interface {
8181
}
8282

8383
type ServiceReady interface {
84-
Ready(out io.Writer, service *service, ctx context.Context) error
84+
Ready(ouservice *service) error
8585
}
8686

87-
func WaitForReady(ctx context.Context, manifest *Manifest) error {
88-
var wg sync.WaitGroup
89-
readyErr := make(chan error, len(manifest.Services()))
90-
91-
output, err := manifest.out.LogOutput("ready")
92-
if err != nil {
93-
return fmt.Errorf("failed to create log output: %w", err)
94-
}
95-
96-
for _, s := range manifest.Services() {
87+
func (m *Manifest) CompleteReady() error {
88+
for _, s := range m.services {
9789
if readyFn, ok := s.component.(ServiceReady); ok {
98-
wg.Add(1)
99-
100-
go func() {
101-
defer wg.Done()
102-
103-
if err := readyFn.Ready(output, s, ctx); err != nil {
104-
readyErr <- fmt.Errorf("service %s failed to start: %w", s.Name, err)
105-
}
106-
}()
107-
}
108-
}
109-
wg.Wait()
110-
111-
close(readyErr)
112-
for err := range readyErr {
113-
if err != nil {
114-
return err
90+
if err := readyFn.Ready(s); err != nil {
91+
return err
92+
}
11593
}
11694
}
11795
return nil
@@ -192,6 +170,7 @@ func (s *Manifest) GetService(name string) (*service, bool) {
192170
// - downloads any local release artifacts for the services that require host execution
193171
func (s *Manifest) Validate() error {
194172
for _, ss := range s.services {
173+
// validate node port references
195174
for _, nodeRef := range ss.nodeRefs {
196175
targetService, ok := s.GetService(nodeRef.Service)
197176
if !ok {
@@ -202,6 +181,21 @@ func (s *Manifest) Validate() error {
202181
return fmt.Errorf("service %s depends on service %s, but it does not expose port %s", ss.Name, nodeRef.Service, nodeRef.PortLabel)
203182
}
204183
}
184+
185+
// validate depends_on statements
186+
for _, dep := range ss.dependsOn {
187+
service, ok := s.GetService(dep.Name)
188+
if !ok {
189+
return fmt.Errorf("service %s depends on service %s, but it is not defined", ss.Name, dep.Name)
190+
}
191+
192+
if dep.Condition == DependsOnConditionHealthy {
193+
// if we depedn on the service to be healthy, it must have a ready check
194+
if service.readyCheck == nil {
195+
return fmt.Errorf("service %s depends on service %s, but it does not have a ready check", ss.Name, dep.Name)
196+
}
197+
}
198+
}
205199
}
206200

207201
// download any local release artifacts for the services that require them
@@ -257,24 +251,6 @@ func (s *serviceLogs) readLogs() (string, error) {
257251
return string(content), nil
258252
}
259253

260-
func (s *serviceLogs) WaitForLog(pattern string, timeout time.Duration) error {
261-
timer := time.After(timeout)
262-
for {
263-
select {
264-
case <-timer:
265-
return fmt.Errorf("timeout waiting for log pattern %s", pattern)
266-
case <-time.After(500 * time.Millisecond):
267-
logs, err := s.readLogs()
268-
if err != nil {
269-
return fmt.Errorf("failed to read logs: %w", err)
270-
}
271-
if strings.Contains(logs, pattern) {
272-
return nil
273-
}
274-
}
275-
}
276-
}
277-
278254
func (s *serviceLogs) FindLog(pattern string) (string, error) {
279255
logs, err := s.readLogs()
280256
if err != nil {
@@ -299,6 +275,10 @@ type service struct {
299275
// list of environment variables to set for the service
300276
env map[string]string
301277

278+
readyCheck *ReadyCheck
279+
280+
dependsOn []DependsOn
281+
302282
ports []*Port
303283
nodeRefs []*NodeRef
304284

@@ -310,6 +290,18 @@ type service struct {
310290
component Service
311291
}
312292

293+
type DependsOnCondition string
294+
295+
const (
296+
DependsOnConditionRunning DependsOnCondition = "service_started"
297+
DependsOnConditionHealthy DependsOnCondition = "service_healthy"
298+
)
299+
300+
type DependsOn struct {
301+
Name string
302+
Condition DependsOnCondition
303+
}
304+
313305
func (s *service) Ports() []*Port {
314306
return s.ports
315307
}
@@ -402,6 +394,30 @@ func (s *service) WithArgs(args ...string) *service {
402394
return s
403395
}
404396

397+
func (s *service) WithReady(check ReadyCheck) *service {
398+
s.readyCheck = &check
399+
return s
400+
}
401+
402+
type ReadyCheck struct {
403+
QueryURL string
404+
Test []string
405+
Interval time.Duration
406+
StartPeriod time.Duration
407+
Timeout time.Duration
408+
Retries int
409+
}
410+
411+
func (s *service) DependsOnHealthy(name string) *service {
412+
s.dependsOn = append(s.dependsOn, DependsOn{Name: name, Condition: DependsOnConditionHealthy})
413+
return s
414+
}
415+
416+
func (s *service) DependsOnRunning(name string) *service {
417+
s.dependsOn = append(s.dependsOn, DependsOn{Name: name, Condition: DependsOnConditionRunning})
418+
return s
419+
}
420+
405421
func applyTemplate(templateStr string) (string, []Port, []NodeRef) {
406422
// use template substitution to load constants
407423
// pass-through the Dir template because it has to be resolved at the runtime
@@ -494,6 +510,18 @@ func (s *Manifest) GenerateDotGraph() string {
494510
}
495511
}
496512

513+
// Add edges for dependws_on
514+
for _, ss := range s.services {
515+
for _, dep := range ss.dependsOn {
516+
sourceNode := strings.ReplaceAll(ss.Name, "-", "_")
517+
targetNode := strings.ReplaceAll(dep.Name, "-", "_")
518+
b.WriteString(fmt.Sprintf(" %s -> %s [style=dashed, color=gray, constraint=true, label=\"depends_on\"];\n",
519+
sourceNode,
520+
targetNode,
521+
))
522+
}
523+
}
524+
497525
b.WriteString("}\n")
498526
return b.String()
499527
}

0 commit comments

Comments
 (0)