Skip to content

Commit c386b48

Browse files
committed
cmd/locktrigger: add fix for Cloud Build race
When a project uses “continuous deployment powered by Cloud Build”, the deployment is a little bit too continuous: when multiple commits land in a short time window, Cloud Build will run all the triggered build jobs in parallel. If each job does “gcloud app deploy”, there is no guarantee which will win: perhaps an older commit will complete last, resulting in the newest commit not actually being the final deployed version of the site. This should probably be fixed in “continuous deployment powered by Cloud Build”, but until then, locktrigger works around the problem. Use locktrigger in cmd/golangorg/cloudbuild.yaml to ensure that when multiple commits race, the newest one always runs its go-app-deploy.sh last. Change-Id: I5ca340250d0a3b7853fc478d35caffdd0163bb0f Reviewed-on: https://go-review.googlesource.com/c/website/+/368365 Trust: Russ Cox <[email protected]> Run-TryBot: Russ Cox <[email protected]> TryBot-Result: Gopher Robot <[email protected]> Reviewed-by: Dmitri Shuralyov <[email protected]>
1 parent 577a9fd commit c386b48

File tree

6 files changed

+261
-38
lines changed

6 files changed

+261
-38
lines changed

cmd/golangorg/cloudbuild.yaml

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,30 +4,23 @@
44

55
steps:
66
- name: gcr.io/cloud-builders/git
7-
args: [
8-
"clone", "--branch=${_GO_REF}", "--depth=1",
9-
"https://go.googlesource.com/go", "_gotmp",
10-
]
7+
args: ["clone", "--branch=${_GO_REF}", "--depth=1", "https://go.googlesource.com/go", "_gotmp"]
118
- name: gcr.io/cloud-builders/git
9+
args: ["archive", "--format=zip", "--output=../_goroot.zip", "HEAD"]
1210
dir: _gotmp
13-
args: [
14-
"archive", "--format=zip", "--output=../_goroot.zip", "HEAD",
15-
]
1611
- name: golang
1712
args: ["rm", "-rf", "_gotmp"]
1813
- name: golang
1914
args: ["go", "test", "./..."]
2015
- name: golang
21-
entrypoint: bash
22-
args: ["-c", "go run ./cmd/events/ > ./_content/events.yaml"]
16+
args: ["bash", "-c", "go run ./cmd/events > ./_content/events.yaml"]
17+
- name: golang
18+
args: ["go", "run", "./cmd/locktrigger", "--project=$PROJECT_ID", "--build=$BUILD_ID"]
2319
- name: gcr.io/cloud-builders/gcloud
2420
entrypoint: bash
2521
args: ["./go-app-deploy.sh", "cmd/golangorg/app.yaml"]
2622
- name: golang
27-
args: [
28-
"go", "run", "./cmd/versionprune", "--dry_run=false",
29-
"--project=$PROJECT_ID", "--service=default",
30-
]
23+
args: ["go", "run", "./cmd/versionprune", "--dry_run=false", "--project=$PROJECT_ID", "--service=default"]
3124

3225
options:
3326
machineType: N1_HIGHCPU_8

cmd/locktrigger/main.go

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
// Copyright 2021 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
// Locktrigger “locks” a given build trigger, making sure that
6+
// the currently running build is the only trigger running.
7+
//
8+
// Usage:
9+
//
10+
// locktrigger -project=$PROJECT_ID -build=$BUILD_ID
11+
//
12+
// The $PROJECT_ID and $BUILD_ID are typically written literally in cloudbuild.yaml
13+
// and then substituted by Cloud Build.
14+
//
15+
// When a project uses “continuous deployment powered by Cloud Build”,
16+
// the deployment is a little bit too continuous: when multiple commits
17+
// land in a short time window, Cloud Build will run all the triggered
18+
// build jobs in parallel. If each job does “gcloud app deploy”, there
19+
// is no guarantee which will win: perhaps an older commit will complete
20+
// last, resulting in the newest commit not actually being the final
21+
// deployed version of the site. This should probably be fixed in
22+
// “continuous deployment powered by Cloud Build”, but until then,
23+
// locktrigger works around the problem.
24+
//
25+
// All triggered builds must run locktrigger to guarantee mutual exclusion.
26+
// When there is contention—that is, when multiple builds are running and
27+
// they all run locktrigger—the build corresponding to the newest commit
28+
// is permitted to continue running, and older builds are canceled.
29+
//
30+
// When locktrigger exits successfully, then, at that moment, the current
31+
// build is (or recently was) the only running build for its trigger.
32+
// Of course, another build may start immediately after locktrigger exits.
33+
// As long as that build also runs locktrigger, then either it will cancel
34+
// itself (if it is older than we are), or it will cancel us before proceeding
35+
// (if we are older than it is).
36+
package main
37+
38+
import (
39+
"bytes"
40+
"context"
41+
"flag"
42+
"fmt"
43+
"log"
44+
"os"
45+
"os/exec"
46+
"strings"
47+
"time"
48+
49+
cloudbuild "cloud.google.com/go/cloudbuild/apiv1/v2"
50+
"google.golang.org/api/iterator"
51+
cloudbuildpb "google.golang.org/genproto/googleapis/devtools/cloudbuild/v1"
52+
)
53+
54+
var (
55+
project = flag.String("project", "", "GCP project `name` (required)")
56+
build = flag.String("build", "", "GCP build `id` (required)")
57+
)
58+
59+
func usage() {
60+
fmt.Fprintf(os.Stderr, "usage: locktrigger -project=name -build=id\n")
61+
os.Exit(2)
62+
}
63+
64+
func main() {
65+
flag.Usage = usage
66+
flag.Parse()
67+
log.SetPrefix("locktrigger: ")
68+
log.SetFlags(0)
69+
70+
if *project == "" || *build == "" {
71+
usage()
72+
}
73+
74+
ctx := context.Background()
75+
c, err := cloudbuild.NewClient(ctx)
76+
if err != nil {
77+
log.Fatal(err)
78+
}
79+
defer c.Close()
80+
81+
// Find commit hash of local Git
82+
myHash := run("git", "rev-parse", "HEAD")
83+
log.Printf("my hash: %v", myHash)
84+
85+
// Find build object for current build, check that it matches.
86+
self := getBuild(c, ctx, *build)
87+
if hash := self.Substitutions["COMMIT_SHA"]; hash != myHash {
88+
log.Fatalf("build COMMIT_SHA does not match local hash: %v != %v", hash, myHash)
89+
}
90+
log.Printf("my build: %v", self.Id)
91+
if self.BuildTriggerId == "" {
92+
log.Fatalf("build has no trigger ID")
93+
}
94+
log.Printf("my trigger: %v", self.BuildTriggerId)
95+
96+
// List all builds for our trigger that are still running.
97+
req := &cloudbuildpb.ListBuildsRequest{
98+
ProjectId: *project,
99+
// Note: Really want "status=WORKING buildTriggerId="+self.BuildTriggerId,
100+
// but that fails with an InvalidArgument error for unknown reasons.
101+
// status=WORKING will narrow the list down to something reasonable,
102+
// and we filter the unrelated triggers below.
103+
Filter: "status=WORKING",
104+
}
105+
it := c.ListBuilds(ctx, req)
106+
foundSelf := false
107+
shallow := false
108+
if _, err := os.Stat(run("git", "rev-parse", "--git-dir") + "/shallow"); err == nil {
109+
shallow = true
110+
}
111+
for {
112+
b, err := it.Next()
113+
if err == iterator.Done {
114+
break
115+
}
116+
if err != nil {
117+
log.Fatalf("reading builds: %v (%q)", err, req.Filter)
118+
}
119+
if b.BuildTriggerId != self.BuildTriggerId {
120+
continue
121+
}
122+
123+
// Check whether this build is an older or newer commit.
124+
// If this build is older, cancel it.
125+
// If this build is newer, cancel ourselves.
126+
if b.Id == self.Id {
127+
foundSelf = true
128+
continue
129+
}
130+
hash := b.Substitutions["COMMIT_SHA"]
131+
if hash == "" {
132+
log.Fatalf("cannot find COMMIT_SHA for build %v", b.Id)
133+
}
134+
if hash == myHash {
135+
log.Fatalf("found another build %v at same commit %v", b.Id, hash)
136+
}
137+
138+
// Fetch the full Git repo so we can answer the history questions.
139+
// This is delayed until now to avoid the expense of fetching the full repo
140+
// if we are the only build that is running.
141+
if shallow {
142+
log.Printf("git fetch --unshallow")
143+
run("git", "fetch", "--unshallow")
144+
shallow = false
145+
}
146+
147+
// Contention.
148+
// Find the common ancestor between us and that build,
149+
// to tell whether we're older, it's older, or we're unrelated.
150+
log.Printf("checking %v", hash)
151+
switch run("git", "merge-base", myHash, hash) {
152+
default:
153+
log.Fatalf("unexpected build for unrelated commit %v", hash)
154+
155+
case myHash:
156+
// myHash is older than b's hash. Cancel self.
157+
log.Printf("canceling self, for build %v commit %v", b.Id, hash)
158+
cancel(c, ctx, self.Id)
159+
160+
case hash:
161+
// b's hash is older than myHash. Cancel b.
162+
log.Printf("canceling build %v commit %v", b.Id, hash)
163+
cancel(c, ctx, b.Id)
164+
}
165+
}
166+
167+
// If we listed all the in-progress builds, we should have seen ourselves.
168+
if !foundSelf {
169+
log.Fatalf("reading builds: didn't find self")
170+
}
171+
}
172+
173+
// getBuild returns the build info for the build with the given id.
174+
func getBuild(c *cloudbuild.Client, ctx context.Context, id string) *cloudbuildpb.Build {
175+
req := &cloudbuildpb.GetBuildRequest{
176+
ProjectId: *project,
177+
Id: id,
178+
}
179+
b, err := c.GetBuild(ctx, req)
180+
if err != nil {
181+
log.Fatalf("getbuild %v: %v", id, err)
182+
}
183+
return b
184+
}
185+
186+
// cancel cancels the build with the given id.
187+
func cancel(c *cloudbuild.Client, ctx context.Context, id string) {
188+
req := &cloudbuildpb.CancelBuildRequest{
189+
ProjectId: *project,
190+
Id: id,
191+
}
192+
_, err := c.CancelBuild(ctx, req)
193+
if err != nil {
194+
// Not Fatal: maybe cancel failed because the build exited.
195+
// Waiting for it to stop running below will take care of that case.
196+
log.Printf("cancel %v: %v", id, err)
197+
}
198+
199+
// Wait for build to report being stopped,
200+
// in case cancel only queues the cancellation and doesn't actually wait,
201+
// or in case cancel failed.
202+
// Willing to wait a few minutes.
203+
now := time.Now()
204+
for time.Since(now) < 3*time.Minute {
205+
b := getBuild(c, ctx, id)
206+
if b.Status != cloudbuildpb.Build_WORKING {
207+
log.Printf("canceled %v: now %v", id, b.Status)
208+
return
209+
}
210+
time.Sleep(10 * time.Second)
211+
}
212+
log.Fatalf("cancel %v: did not stop", id)
213+
}
214+
215+
// run runs the given command line and returns the standard output, with spaces trimmed.
216+
func run(args ...string) string {
217+
var stdout, stderr bytes.Buffer
218+
cmd := exec.Command(args[0], args[1:]...)
219+
cmd.Stdout = &stdout
220+
cmd.Stderr = &stderr
221+
if err := cmd.Run(); err != nil {
222+
log.Fatalf("exec %v: %v\n%s%s", args, err, stdout.String(), stderr.String())
223+
}
224+
return strings.TrimSpace(stdout.String())
225+
}

cmd/versionprune/doc.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
1+
// Copyright 2021 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
15
/*
2-
Binary versionprune prunes stale AppEngine versions for a specified service.
6+
Versionprune prunes stale AppEngine versions for a specified service.
37
48
The command by default will:
59
- keep the latest 5 versions

cmd/versionprune/main.go

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
1+
// Copyright 2021 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
15
package main
26

37
import (
48
"context"
59
"flag"
610
"fmt"
11+
"log"
712
"os"
813
"sort"
914
"time"
@@ -12,35 +17,33 @@ import (
1217
)
1318

1419
var (
15-
dryRun = flag.Bool("dry_run", true, "When true, just print intended modifications and quit")
16-
keepDuration = flag.Duration("keep_duration", 24*time.Hour, "Versions older than this will be deleted")
17-
keepNumber = flag.Int("keep_number", 5, "Minimum number of versions to keep")
18-
project = flag.String("project", "", "GCP Project (required)")
19-
service = flag.String("service", "", "AppEngine service (required)")
20+
dryRun = flag.Bool("dry_run", true, "print but do not run changes")
21+
keepDuration = flag.Duration("keep_duration", 24*time.Hour, "keep versions with age < `t`")
22+
keepNumber = flag.Int("keep_number", 5, "keep at least `n` versions")
23+
project = flag.String("project", "", "GCP project `name` (required)")
24+
service = flag.String("service", "", "AppEngine service `name` (required)")
2025
)
2126

27+
func usage() {
28+
fmt.Fprintf(os.Stderr, "usage: versionprune -project=name -service=name [options]\n")
29+
flag.PrintDefaults()
30+
os.Exit(2)
31+
}
32+
2233
func main() {
34+
flag.Usage = usage
2335
flag.Parse()
36+
log.SetPrefix("versionprune: ")
37+
log.SetFlags(0)
2438

25-
if *project == "" {
26-
fmt.Println("-project flag is required.")
27-
flag.Usage()
28-
os.Exit(1)
29-
}
30-
if *service == "" {
31-
fmt.Println("-service flag is required.")
32-
flag.Usage()
33-
os.Exit(1)
39+
if *project == "" || *service == "" {
40+
usage()
3441
}
3542
if *keepDuration < 0 {
36-
fmt.Printf("-keep_duration must be greater or equal to 0, got %s\n", *keepDuration)
37-
flag.Usage()
38-
os.Exit(1)
43+
log.Fatalf("-keep_duration=%v must be >= 0", *keepDuration)
3944
}
4045
if *keepNumber < 0 {
41-
fmt.Printf("-keep_number must be greater or equal to 0, got %d\n", *keepNumber)
42-
flag.Usage()
43-
os.Exit(1)
46+
log.Fatalf("-keep_number=%d must be >= 0", *keepNumber)
4447
}
4548

4649
if err := run(context.Background()); err != nil {

go.mod

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ module golang.org/x/website
33
go 1.16
44

55
require (
6+
cloud.google.com/go v0.88.0
67
cloud.google.com/go/datastore v1.2.0
78
github.com/gomodule/redigo v2.0.0+incompatible
89
github.com/google/go-cmp v0.5.6
@@ -11,8 +12,8 @@ require (
1112
golang.org/x/build v0.0.0-20211102155042-c046fca86e58
1213
golang.org/x/net v0.0.0-20210726213435-c6fcb2dbf985
1314
golang.org/x/tools v0.1.5
14-
golang.org/x/tour v0.0.0-20210526031051-3891a3eb15c0
1515
google.golang.org/api v0.51.0
16+
google.golang.org/genproto v0.0.0-20210726143408-b02e89920bf0
1617
gopkg.in/yaml.v2 v2.4.0
1718
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b
1819
)

go.sum

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -993,13 +993,10 @@ golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4f
993993
golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0=
994994
golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
995995
golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
996-
golang.org/x/tools v0.1.3-0.20210525215409-a3eb095d6aee/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
997996
golang.org/x/tools v0.1.3/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
998997
golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
999998
golang.org/x/tools v0.1.5 h1:ouewzE6p+/VEB31YYnTbEJdi8pFqKp4P4n85vwo3DHA=
1000999
golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
1001-
golang.org/x/tour v0.0.0-20210526031051-3891a3eb15c0 h1:a8Cl2fISREZQwBT5izVICCIC51QrZXfV087EaJMK7ZY=
1002-
golang.org/x/tour v0.0.0-20210526031051-3891a3eb15c0/go.mod h1:7RMQeqT5ScoysCgwPp55tOo09RuvuVD10CBiMXGyVzQ=
10031000
golang.org/x/xerrors v0.0.0-20190410155217-1f06c39b4373/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
10041001
golang.org/x/xerrors v0.0.0-20190513163551-3ee3066db522/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
10051002
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=

0 commit comments

Comments
 (0)