Skip to content
This repository was archived by the owner on Sep 30, 2020. It is now read-only.

Commit 53bd54a

Browse files
authored
Merge pull request #748 from HotelsDotCom/feature/journald-feedback
Provide real-time feedback from Journald logs when updating/creating …
2 parents a8ee86e + 9f9dfd2 commit 53bd54a

File tree

6 files changed

+126
-1
lines changed

6 files changed

+126
-1
lines changed

Documentation/kubernetes-on-aws-journald-cloudwatch-logs.md

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,43 @@ cloudWatchLogging:
1414
retentionInDays: 7
1515
```
1616

17+
1718
The docker image is also configurable:
1819

1920
```
2021
journaldCloudWatchLogsImage:
2122
repo: "jollinshead/journald-cloudwatch-logs"
2223
tag: "0.1"
2324
rktPullDocker: true
24-
```
25+
```
26+
27+
## kube-aws up/update feedback
28+
29+
During kube-aws up/update, filtered Journald logs can be printed to stdout.
30+
This feature is configurable in cluster.yaml under the *cloudWatchLogging* section, and requires *cloudWatchLogging* to be enabled.
31+
( Default values: )
32+
33+
```
34+
cloudWatchLogging:
35+
enabled: false
36+
imageWithTag: jollinshead/journald-cloudwatch-logs:0.1
37+
retentionInDays: 7
38+
localStreaming:
39+
enabled: true,
40+
filter: `{ $.priority = "CRIT" || $.priority = "WARNING" && $.transport = "journal" && $.systemdUnit = "init.scope" }`,
41+
interval: 60
42+
```
43+
44+
NOTE: Due to high initial entropy, *.service* failures may occur during the early stages of booting.
45+
In this context Entropy refers to the disorder of *.service*s (starting, failing, restarting).
46+
47+
### Parameters
48+
49+
#### Filter
50+
By default the filter is configured for *.service* failures and messages flagged as 'critical'.
51+
See [the official AWS documentation](http://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/FilterAndPatternSyntax.html) for more information.
52+
53+
#### Interval
54+
Since some messages are produced frequently, to avoid excessive spam, an 'interval' parameter is provided.
55+
This 'interval' value determines the time between printing two identical messages to stdout.
56+

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ Check out our getting started tutorial on launching your first Kubernetes cluste
4848
* [Backup Kubernetes resources](/Documentation/kubernetes-on-aws-backup.md)
4949
* [Restore Kubernetes resources](/contrib/cluster-backup/README.md)
5050
* [Journald logging to AWS CloudWatch](/Documentation/kubernetes-on-aws-journald-cloudwatch-logs.md)
51+
* [kube-aws up/update feedback](/Documentation/kubernetes-on-aws-journald-cloudwatch-logs.md)
5152

5253
## Examples
5354

core/controlplane/config/config.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,11 @@ func NewDefaultCluster() *Cluster {
113113
CloudWatchLogging: CloudWatchLogging{
114114
Enabled: false,
115115
RetentionInDays: 7,
116+
LocalStreaming: LocalStreaming{
117+
Enabled: true,
118+
Filter: `{ $.priority = "CRIT" || $.priority = "WARNING" && $.transport = "journal" && $.systemdUnit = "init.scope" }`,
119+
interval: 60,
120+
},
116121
},
117122
HyperkubeImage: model.Image{Repo: "quay.io/coreos/hyperkube", Tag: k8sVer, RktPullDocker: false},
118123
AWSCliImage: model.Image{Repo: "quay.io/coreos/awscli", Tag: "master", RktPullDocker: false},
@@ -747,6 +752,18 @@ type KubeResourcesAutosave struct {
747752
type CloudWatchLogging struct {
748753
Enabled bool `yaml:"enabled"`
749754
RetentionInDays int `yaml:"retentionInDays"`
755+
LocalStreaming `yaml:"localStreaming"`
756+
}
757+
758+
type LocalStreaming struct {
759+
Enabled bool `yaml:"enabled"`
760+
Filter string `yaml:"filter"`
761+
interval int `yaml:"interval"`
762+
}
763+
764+
func (c *LocalStreaming) Interval() int64 {
765+
// Convert from seconds to milliseconds (and return as int64 type)
766+
return int64(c.interval * 1000)
750767
}
751768

752769
func (c *CloudWatchLogging) MergeIfEmpty(other CloudWatchLogging) {

core/controlplane/config/templates/cluster.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1145,6 +1145,12 @@ worker:
11451145
#cloudWatchLogging:
11461146
# enabled: false
11471147
# retentionInDays: 7
1148+
# # When enabled, feedback from Journald logs (with an applied filter) will be outputted during kube-aws 'apply | up'.
1149+
# # It is enabled by default - provided cloudWatchLogging is enabled.
1150+
# localStreaming:
1151+
# enabled: true,
1152+
# filter: `{ $.priority = "CRIT" || $.priority = "WARNING" && $.transport = "journal" && $.systemdUnit = "init.scope" }`,
1153+
# interval: 60
11481154

11491155
# Addon features
11501156
addons:

core/root/cluster.go

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
package root
22

33
import (
4+
"encoding/json"
45
"fmt"
56
"github.com/aws/aws-sdk-go/aws"
67
"github.com/aws/aws-sdk-go/aws/session"
78
"github.com/aws/aws-sdk-go/service/cloudformation"
9+
"github.com/aws/aws-sdk-go/service/cloudwatchlogs"
810
"github.com/aws/aws-sdk-go/service/s3"
911
"github.com/kubernetes-incubator/kube-aws/cfnstack"
1012
controlplane "github.com/kubernetes-incubator/kube-aws/core/controlplane/cluster"
@@ -14,10 +16,12 @@ import (
1416
"github.com/kubernetes-incubator/kube-aws/core/root/config"
1517
"github.com/kubernetes-incubator/kube-aws/core/root/defaults"
1618
"github.com/kubernetes-incubator/kube-aws/filereader/jsontemplate"
19+
model "github.com/kubernetes-incubator/kube-aws/model"
1720
"io/ioutil"
1821
"os"
1922
"path/filepath"
2023
"strings"
24+
"time"
2125
)
2226

2327
const (
@@ -182,6 +186,13 @@ func (c clusterImpl) Create() error {
182186
return err
183187
}
184188

189+
if c.controlPlane.CloudWatchLogging.Enabled && c.controlPlane.CloudWatchLogging.LocalStreaming.Enabled {
190+
// Return Journald logs in a separate GoRoutine
191+
quit := make(chan bool)
192+
defer func() { quit <- true }()
193+
go streamJournaldLogs(c, quit)
194+
}
195+
185196
return c.stackProvisioner().CreateStackAtURLAndWait(cfSvc, stackTemplateURL)
186197
}
187198

@@ -296,6 +307,13 @@ func (c clusterImpl) Update() (string, error) {
296307
return "", err
297308
}
298309

310+
if c.controlPlane.CloudWatchLogging.Enabled && c.controlPlane.CloudWatchLogging.LocalStreaming.Enabled {
311+
// Return Journald logs in a separate GoRoutine
312+
quit := make(chan bool)
313+
defer func() { quit <- true }()
314+
go streamJournaldLogs(c, quit)
315+
}
316+
299317
return c.stackProvisioner().UpdateStackAtURLAndWait(cfSvc, templateUrl)
300318
}
301319

@@ -348,3 +366,42 @@ func (c clusterImpl) ValidateStack() (string, error) {
348366

349367
return strings.Join(reports, "\n"), nil
350368
}
369+
370+
func streamJournaldLogs(c clusterImpl, quit chan bool) error {
371+
fmt.Printf("Printing filtered Journald logs for log group '%s'...\nNOTE: Due to high initial entropy, failures may occur during the early stages of booting.\n", c.controlPlane.ClusterName)
372+
cwlSvc := cloudwatchlogs.New(c.session)
373+
startTime := time.Now().Unix() * 1E3
374+
fleInput := cloudwatchlogs.FilterLogEventsInput{
375+
LogGroupName: &c.controlPlane.ClusterName,
376+
FilterPattern: &c.controlPlane.CloudWatchLogging.LocalStreaming.Filter,
377+
StartTime: &startTime}
378+
messages := make(map[string]int64)
379+
380+
for {
381+
select {
382+
case <-quit:
383+
return nil
384+
default:
385+
out, err := cwlSvc.FilterLogEvents(&fleInput)
386+
if err != nil {
387+
continue
388+
}
389+
if len(out.Events) > 1 {
390+
startTime = *out.Events[len(out.Events)-1].Timestamp
391+
for _, event := range out.Events {
392+
if *event.Timestamp > messages[*event.Message]+c.controlPlane.CloudWatchLogging.LocalStreaming.Interval() {
393+
messages[*event.Message] = *event.Timestamp
394+
res := model.SystemdMessageResponse{}
395+
json.Unmarshal([]byte(*event.Message), &res)
396+
fmt.Printf("%s: \"%s\"\n", res.Hostname, res.Message)
397+
}
398+
}
399+
}
400+
fleInput = cloudwatchlogs.FilterLogEventsInput{
401+
LogGroupName: &c.controlPlane.ClusterName,
402+
FilterPattern: &c.controlPlane.CloudWatchLogging.LocalStreaming.Filter,
403+
NextToken: out.NextToken,
404+
StartTime: &startTime}
405+
}
406+
}
407+
}

model/cloudwatch_logging.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
package model
2+
3+
type SystemdMessageResponse struct {
4+
InstanceId string `json:"instanceId,omitempty"`
5+
Hostname string `json:"hostname,omitempty"`
6+
CmdName string `json:"cmdName,omitempty"`
7+
Exe string `json:"exe,omitempty"`
8+
CmdLine string `json:"cmdLine,omitempty"`
9+
SystemdUnit string `json:"systemdUnit,omitempty"`
10+
Priority string `json:"priority,omitempty"`
11+
Message string `json:"message,omitempty"`
12+
}

0 commit comments

Comments
 (0)