Skip to content

Commit 67744e9

Browse files
author
OpenShift Bot
authored
Merge pull request #556 from nhosoi/file_buffer_1
Merged by openshift-bot
2 parents c0c57d5 + 6252ba1 commit 67744e9

File tree

10 files changed

+232
-74
lines changed

10 files changed

+232
-74
lines changed

fluentd/configs.d/openshift/es-copy-config.conf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
flush_interval "#{ENV['ES_FLUSH_INTERVAL'] || '5s'}"
2121
max_retry_wait "#{ENV['ES_RETRY_WAIT'] || '300'}"
2222
disable_retry_limit true
23+
buffer_type file
24+
buffer_path '/var/lib/fluentd/buffer-es-copy-config'
2325
buffer_queue_limit "#{ENV['BUFFER_QUEUE_LIMIT'] || '1024' }"
2426
buffer_chunk_limit "#{ENV['BUFFER_SIZE_LIMIT'] || '1m' }"
2527
# the systemd journald 0.0.8 input plugin will just throw away records if the buffer

fluentd/configs.d/openshift/es-ops-copy-config.conf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
flush_interval "#{ENV['OPS_FLUSH_INTERVAL'] || ENV['ES_FLUSH_INTERVAL'] || '5s'}"
2121
max_retry_wait "#{ENV['OPS_RETRY_WAIT'] || ENV['ES_RETRY_WAIT'] || '300'}"
2222
disable_retry_limit true
23+
buffer_type file
24+
buffer_path '/var/lib/fluentd/buffer-es-ops-copy-config'
2325
buffer_queue_limit "#{ENV['BUFFER_QUEUE_LIMIT'] || '1024' }"
2426
buffer_chunk_limit "#{ENV['BUFFER_SIZE_LIMIT'] || '1m' }"
2527
# the systemd journald 0.0.8 input plugin will just throw away records if the buffer

fluentd/configs.d/openshift/output-es-config.conf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
flush_interval "#{ENV['ES_FLUSH_INTERVAL'] || '5s'}"
2121
max_retry_wait "#{ENV['ES_RETRY_WAIT'] || '300'}"
2222
disable_retry_limit true
23+
buffer_type file
24+
buffer_path '/var/lib/fluentd/buffer-output-es-config'
2325
buffer_queue_limit "#{ENV['BUFFER_QUEUE_LIMIT'] || '1024' }"
2426
buffer_chunk_limit "#{ENV['BUFFER_SIZE_LIMIT'] || '1m' }"
2527
# the systemd journald 0.0.8 input plugin will just throw away records if the buffer

fluentd/configs.d/openshift/output-es-ops-config.conf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
flush_interval "#{ENV['OPS_FLUSH_INTERVAL'] || ENV['ES_FLUSH_INTERVAL'] || '5s'}"
2121
max_retry_wait "#{ENV['OPS_RETRY_WAIT'] || ENV['ES_RETRY_WAIT'] || '300'}"
2222
disable_retry_limit true
23+
buffer_type file
24+
buffer_path '/var/lib/fluentd/buffer-output-es-ops-config'
2325
buffer_queue_limit "#{ENV['BUFFER_QUEUE_LIMIT'] || '1024' }"
2426
buffer_chunk_limit "#{ENV['BUFFER_SIZE_LIMIT'] || '1m' }"
2527
# the systemd journald 0.0.8 input plugin will just throw away records if the buffer

fluentd/run.sh

Lines changed: 48 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,7 @@ IPADDR4=`/usr/sbin/ip -4 addr show dev eth0 | grep inet | sed -e "s/[ \t]*inet \
6161
IPADDR6=`/usr/sbin/ip -6 addr show dev eth0 | grep inet6 | sed "s/[ \t]*inet6 \([a-f0-9:]*\).*/\1/"`
6262
export IPADDR4 IPADDR6
6363

64-
BUFFER_SIZE_LIMIT=${BUFFER_SIZE_LIMIT:-1048576}
65-
MUX_CPU_LIMIT=${MUX_CPU_LIMIT:-500m}
66-
MUX_MEMORY_LIMIT=${MUX_MEMORY_LIMIT:-2Gi}
67-
FLUENTD_CPU_LIMIT=${FLUENTD_CPU_LIMIT:-100m}
68-
FLUENTD_MEMORY_LIMIT=${FLUENTD_MEMORY_LIMIT:-512Mi}
64+
BUFFER_SIZE_LIMIT=${BUFFER_SIZE_LIMIT:-16777216}
6965

7066
CFG_DIR=/etc/fluent/configs.d
7167
if [ "${USE_MUX:-}" = "true" ] ; then
@@ -129,30 +125,55 @@ else
129125
fi
130126
export K8S_FILTER_REMOVE_KEYS
131127

132-
if [ "${USE_MUX:-}" = "true" ] ; then
133-
TOTAL_MEMORY_LIMIT=`echo $MUX_MEMORY_LIMIT | sed -e "s/[Kk]/*1024/g;s/[Mm]/*1024*1024/g;s/[Gg]/*1024*1024*1024/g;s/i//g" | bc`
128+
# How many outputs?
129+
if [ -n "${MUX_CLIENT_MODE:-}" -o "${USE_MUX_CLIENT:-}" = "true" ] ; then
130+
# A fluentd collector configured as a mux client has just one output: sending to a mux.
131+
NUM_OUTPUTS=1
134132
else
135-
TOTAL_MEMORY_LIMIT=`echo $FLUENTD_MEMORY_LIMIT | sed -e "s/[Kk]/*1024/g;s/[Mm]/*1024*1024/g;s/[Gg]/*1024*1024*1024/g;s/i//g" | bc`
136-
fi
137-
BUFFER_SIZE_LIMIT=`echo $BUFFER_SIZE_LIMIT | sed -e "s/[Kk]/*1024/g;s/[Mm]/*1024*1024/g;s/[Gg]/*1024*1024*1024/g;s/i//g" | bc`
138-
if [ $BUFFER_SIZE_LIMIT -eq 0 ]; then
139-
BUFFER_SIZE_LIMIT=1048576
133+
# fluentd usually has 2 outputs.
134+
NUM_OUTPUTS=2
135+
if [ "$ES_COPY" = "true" ]; then
136+
NUM_OUTPUTS=`expr $NUM_OUTPUTS \* 2`
137+
fi
140138
fi
141139

142-
DIV=1
143-
if [ "$ES_HOST" != "$OPS_HOST" ] || [ "$ES_PORT" != "$OPS_PORT" ] ; then
144-
# using ops cluster
145-
DIV=`expr $DIV \* 2`
140+
# If FILE_BUFFER_PATH exists and it is not a directory, mkdir fails with the error.
141+
FILE_BUFFER_PATH=/var/lib/fluentd
142+
mkdir -p $FILE_BUFFER_PATH
143+
144+
# Get the available disk size; use 1/4 of it
145+
DF_LIMIT=$(df -B1 $FILE_BUFFER_PATH | grep -v Filesystem | awk '{print $2}')
146+
DF_LIMIT=${DF_LIMIT:-0}
147+
DF_LIMIT=$(expr $DF_LIMIT / 4) || :
148+
if [ $DF_LIMIT -eq 0 ]; then
149+
echo "ERROR: No disk space is available for file buffer in $FILE_BUFFER_PATH."
150+
exit 1
146151
fi
147-
if [ -n "${MUX_CLIENT_MODE:-}" -o "${USE_MUX_CLIENT:-}" = "true" ] ; then
148-
DIV=`expr $DIV \* 2`
152+
# Determine final total given the number of outputs we have.
153+
TOTAL_LIMIT=$(echo ${FILE_BUFFER_LIMIT:-2Gi} | sed -e "s/[Kk]/*1024/g;s/[Mm]/*1024*1024/g;s/[Gg]/*1024*1024*1024/g;s/i//g" | bc) || :
154+
if [ $TOTAL_LIMIT -le 0 ]; then
155+
echo "ERROR: Invalid file buffer limit ($FILE_BUFFER_LIMIT) is given. Failed to convert to bytes."
156+
exit 1
149157
fi
158+
TOTAL_LIMIT=$(expr $TOTAL_LIMIT \* $NUM_OUTPUTS) || :
159+
if [ $DF_LIMIT -lt $TOTAL_LIMIT ]; then
160+
echo "WARNING: Available disk space ($DF_LIMIT bytes) is less than the user specified file buffer limit ($FILE_BUFFER_LIMIT times $NUM_OUTPUTS)."
161+
TOTAL_LIMIT=$DF_LIMIT
162+
fi
163+
164+
BUFFER_SIZE_LIMIT=$(echo $BUFFER_SIZE_LIMIT | sed -e "s/[Kk]/*1024/g;s/[Mm]/*1024*1024/g;s/[Gg]/*1024*1024*1024/g;s/i//g" | bc)
165+
BUFFER_SIZE_LIMIT=${BUFFER_SIZE_LIMIT:-16777216}
150166

151-
# MEMORY_LIMIT per buffer
152-
MEMORY_LIMIT=`expr $TOTAL_MEMORY_LIMIT / $DIV`
153-
BUFFER_QUEUE_LIMIT=`expr $MEMORY_LIMIT / $BUFFER_SIZE_LIMIT`
154-
if [ $BUFFER_QUEUE_LIMIT -eq 0 ]; then
155-
BUFFER_QUEUE_LIMIT=1024
167+
# TOTAL_BUFFER_SIZE_LIMIT per buffer
168+
TOTAL_BUFFER_SIZE_LIMIT=$(expr $TOTAL_LIMIT / $NUM_OUTPUTS) || :
169+
if [ -z $TOTAL_BUFFER_SIZE_LIMIT -o $TOTAL_BUFFER_SIZE_LIMIT -eq 0 ]; then
170+
echo "ERROR: Calculated TOTAL_BUFFER_SIZE_LIMIT is 0. TOTAL_LIMIT $TOTAL_LIMIT is too small compared to NUM_OUTPUTS $NUM_OUTPUTS. Please increase FILE_BUFFER_LIMIT $FILE_BUFFER_LIMIT and/or the volume size of $FILE_BUFFER_PATH."
171+
exit 1
172+
fi
173+
BUFFER_QUEUE_LIMIT=$(expr $TOTAL_BUFFER_SIZE_LIMIT / $BUFFER_SIZE_LIMIT) || :
174+
if [ -z $BUFFER_QUEUE_LIMIT -o $BUFFER_QUEUE_LIMIT -eq 0 ]; then
175+
echo "ERROR: Calculated BUFFER_QUEUE_LIMIT is 0. TOTAL_BUFFER_SIZE_LIMIT $TOTAL_BUFFER_SIZE_LIMIT is too small compared to BUFFER_SIZE_LIMIT $BUFFER_SIZE_LIMIT. Please increase FILE_BUFFER_LIMIT $FILE_BUFFER_LIMIT and/or the volume size of $FILE_BUFFER_PATH."
176+
exit 1
156177
fi
157178
export BUFFER_QUEUE_LIMIT BUFFER_SIZE_LIMIT
158179

@@ -167,12 +188,15 @@ OPS_COPY_PASSWORD="${OPS_COPY_PASSWORD:-$ES_COPY_PASSWORD}"
167188
export OPS_COPY_HOST OPS_COPY_PORT OPS_COPY_SCHEME OPS_COPY_CLIENT_CERT \
168189
OPS_COPY_CLIENT_KEY OPS_COPY_CA OPS_COPY_USERNAME OPS_COPY_PASSWORD
169190

170-
if [ "$ES_COPY" = "true" ] ; then
191+
if [ "$ES_COPY" = "true" -a "${USE_MUX_CLIENT:-}" != "true" ] ; then
171192
# user wants to split the output of fluentd into two different elasticsearch
172193
# user will provide the necessary COPY environment variables as above
173194
cp $CFG_DIR/{openshift,dynamic}/es-copy-config.conf
174195
cp $CFG_DIR/{openshift,dynamic}/es-ops-copy-config.conf
175196
else
197+
if [ "$ES_COPY" = "true" ] ; then
198+
echo "WARNING: When USE_MUX_CLIENT is true, logs are forwarded to MUX; COPY won't work with it."
199+
fi
176200
# create empty files for the ES copy config
177201
echo > $CFG_DIR/dynamic/es-copy-config.conf
178202
echo > $CFG_DIR/dynamic/es-ops-copy-config.conf

hack/testing/init-log-stack

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,38 @@ ES_HOSTNAME=${ES_HOST:+openshift_logging_es_hostname=$ES_HOST}
1111
ES_OPS_ALLOW_EXTERNAL=${ES_OPS_HOST:+openshift_logging_es_ops_allow_external=True}
1212
ES_OPS_HOSTNAME=${ES_OPS_HOST:+openshift_logging_es_ops_hostname=$ES_OPS_HOST}
1313

14+
SET_MUX_CPU_LIMIT=""
15+
SET_MUX_MEMORY_LIMIT=""
16+
SET_MUX_BUFFER_QUEUE_LIMIT=""
17+
SET_MUX_BUFFER_SIZE_LIMIT=""
18+
SET_MUX_FILE_BUFFER_STORAGE_TYPE=""
19+
SET_MUX_FILE_BUFFER_PVC_NAME=""
20+
SET_MUX_FILE_BUFFER_PVC_SIZE=""
1421
if [ "$MUX_ALLOW_EXTERNAL" = true ] ; then
1522
SET_MUX_CPU_LIMIT="openshift_logging_mux_cpu_limit=${MUX_CPU_LIMIT:-500m}"
1623
SET_MUX_MEMORY_LIMIT="openshift_logging_mux_memory_limit=${MUX_MEMORY_LIMIT:-2Gi}"
1724
SET_MUX_BUFFER_QUEUE_LIMIT="openshift_logging_mux_buffer_queue_limit=${MUX_BUFFER_QUEUE_LIMIT:-1024}"
1825
SET_MUX_BUFFER_SIZE_LIMIT="openshift_logging_mux_buffer_size_limit=${MUX_BUFFER_SIZE_LIMIT:-1048576}"
19-
else
20-
SET_MUX_CPU_LIMIT=""
21-
SET_MUX_MEMORY_LIMIT=""
22-
SET_MUX_BUFFER_QUEUE_LIMIT=""
23-
SET_MUX_BUFFER_SIZE_LIMIT=""
26+
SET_MUX_FILE_BUFFER_STORAGE_TYPE="openshift_logging_mux_file_buffer_storage_type=${MUX_FILE_BUFFER_STORAGE_TYPE:-emptydir}"
27+
if [ "${MUX_FILE_BUFFER_STORAGE_TYPE:-}" = "pvc" ]; then
28+
SET_MUX_FILE_BUFFER_PVC_NAME="openshift_logging_mux_file_buffer_pvc_name=logging-mux-pvc"
29+
SET_MUX_FILE_BUFFER_PVC_SIZE="openshift_logging_mux_file_buffer_pvc_size=6Gi"
30+
echo "### Prepare pvc storage for testing"
31+
echo "apiVersion: \"v1\"
32+
kind: \"PersistentVolume\"
33+
metadata:
34+
name: logging-muxpv-1
35+
spec:
36+
capacity:
37+
storage: \"6Gi\"
38+
accessModes:
39+
- \"ReadWriteOnce\"
40+
hostPath:
41+
path: ${FILE_BUFFER_PATH:-/var/lib/fluentd}" > /tmp/host-pv.yaml
42+
cat /tmp/host-pv.yaml
43+
cat /tmp/host-pv.yaml | oc create -f -
44+
echo "###################################"
45+
fi
2446
fi
2547

2648
source $OS_O_A_L_DIR/hack/testing/build-images
@@ -64,7 +86,9 @@ $SET_MUX_CPU_LIMIT
6486
$SET_MUX_MEMORY_LIMIT
6587
$SET_MUX_BUFFER_QUEUE_LIMIT
6688
$SET_MUX_BUFFER_SIZE_LIMIT
67-
89+
$SET_MUX_FILE_BUFFER_STORAGE_TYPE
90+
$SET_MUX_FILE_BUFFER_PVC_NAME
91+
$SET_MUX_FILE_BUFFER_PVC_SIZE
6892
EOL
6993

7094
echo "### Created host inventory file ###"

hack/testing/logging.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,6 @@ rm -f $lfds
213213
# when fluentd starts up it may take a while before it catches up with all of the logs
214214
# let's wait until that happens
215215
wait_for_fluentd_ready
216-
wait_for_fluentd_to_catch_up
217216

218217
# add admin user and normal user for kibana and token auth testing
219218
export LOG_ADMIN_USER=admin
@@ -244,6 +243,7 @@ if [ "$ENABLE_OPS_CLUSTER" = "true" ] ; then
244243
curl_es_input $esopspod /$sg_opsindex/rolesmapping/0 -XPUT -d@- | \
245244
python -mjson.tool
246245
fi
246+
wait_for_fluentd_to_catch_up
247247

248248
# verify that $LOG_ADMIN_USER user has access to cluster stats
249249
sleep 5

hack/testing/prep-host

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ OS_ANSIBLE_BRANCH=${OS_ANSIBLE_BRANCH:-master}
1111
OS_ANSIBLE_DIR=$WORKDIR/openhift-ansible
1212

1313
sudo yum makecache fast
14-
sudo yum install python2-pip $RUAMEL_YAML_RPM $ANSIBLE_RPM -y
14+
sudo yum install python2-pip $RUAMEL_YAML_RPM $ANSIBLE_RPM bc -y
1515

1616
git clone $OS_ANSIBLE_REPO $OS_ANSIBLE_DIR -b $OS_ANSIBLE_BRANCH --depth=1
1717

hack/testing/test-es-copy.sh

Lines changed: 54 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,11 @@ if ! type get_running_pod > /dev/null 2>&1 ; then
1313
. ${OS_O_A_L_DIR:-../..}/deployer/scripts/util.sh
1414
fi
1515

16+
if [ "$USE_MUX_CLIENT" = "true" ] ; then
17+
echo "Skipping -- This test is exclusive to USE_MUX_CLIENT != true."
18+
exit 0
19+
fi
20+
1621
if [[ $# -ne 1 || "$1" = "false" ]]; then
1722
# assuming not using OPS cluster
1823
CLUSTER="false"
@@ -45,15 +50,21 @@ write_and_verify_logs() {
4550
return $rc
4651
}
4752

48-
restart_fluentd() {
49-
# delete daemonset which also stops fluentd
50-
oc delete daemonset logging-fluentd
51-
# wait for fluentd to stop
53+
undeploy_fluentd() {
54+
fpod=`get_running_pod fluentd`
55+
56+
# undeploy fluentd
57+
oc label node --all logging-infra-fluentd-
58+
5259
wait_for_pod_ACTION stop $fpod
53-
# create the daemonset which will also start fluentd
54-
oc process logging-fluentd-template | oc create -f -
55-
# wait for fluentd to start
56-
wait_for_pod_ACTION start fluentd
60+
}
61+
62+
redeploy_fluentd() {
63+
# redeploy fluentd
64+
oc label node --all logging-infra-fluentd=true
65+
66+
# wait for fluentd to start
67+
wait_for_pod_ACTION start fluentd
5768
}
5869

5970
TEST_DIVIDER="------------------------------------------"
@@ -64,33 +75,36 @@ TEST_DIVIDER="------------------------------------------"
6475
# cause messages to be written to the system log - verify that OPS contains
6576
# two copies
6677

67-
fpod=`get_running_pod fluentd`
78+
undeploy_fluentd
6879

69-
# first, make sure copy is off
7080
cfg=`mktemp`
71-
oc get template logging-fluentd-template -o yaml | \
81+
# first, make sure copy is off
82+
oc get daemonset logging-fluentd -o yaml | \
7283
sed '/- name: ES_COPY/,/value:/ s/value: .*$/value: "false"/' | \
7384
oc replace -f -
74-
restart_fluentd
75-
fpod=`get_running_pod fluentd`
7685

77-
# save original template config
78-
origconfig=`mktemp`
79-
oc get template logging-fluentd-template -o yaml > $origconfig
86+
redeploy_fluentd
8087

8188
# run test to make sure fluentd is working normally - no copy
8289
write_and_verify_logs 1 || {
8390
oc get events -o yaml > $ARTIFACT_DIR/all-events.yaml 2>&1
8491
exit 1
8592
}
8693

94+
undeploy_fluentd
95+
96+
# save original daemonset config
97+
origconfig=`mktemp`
98+
oc get daemonset logging-fluentd -o yaml > $origconfig
99+
87100
cleanup() {
88101
# may have already been cleaned up
89102
if [ ! -f $origconfig ] ; then return 0 ; fi
103+
undeploy_fluentd
90104
# put back original configuration
91105
oc replace --force -f $origconfig
92106
rm -f $origconfig
93-
restart_fluentd
107+
redeploy_fluentd
94108
}
95109
trap "cleanup" INT TERM EXIT
96110

@@ -99,36 +113,43 @@ nocopy=`mktemp`
99113
sed '/_COPY/,/value/d' $origconfig > $nocopy
100114
# for every ES_ or OPS_ setting, create a copy called ES_COPY_ or OPS_COPY_
101115
envpatch=`mktemp`
102-
sed -n '/^ - env:/,/^ image:/ {
103-
/^ image:/d
104-
/^ - env:/d
116+
sed -n '/^ *- env:/,/^ *image:/ {
117+
/^ *image:/d
118+
/^ *- env:/d
105119
/name: K8S_HOST_URL/,/value/d
120+
/name: .*JOURNAL.*/,/value/d
121+
/name: .*BUFFER.*/,/value/d
122+
/name: .*MUX.*/,/value/d
123+
/name: FLUENTD_.*_LIMIT/,/valueFrom:/d
124+
/resourceFieldRef:/,/containerName: fluentd-elasticsearch/d
125+
/divisor:/,/resource: limits./d
106126
s/ES_/ES_COPY_/
107127
s/OPS_/OPS_COPY_/
108128
p
109129
}' $nocopy > $envpatch
110130

111131
# add the scheme, and turn on verbose
112132
cat >> $envpatch <<EOF
113-
- name: ES_COPY
114-
value: "true"
115-
- name: ES_COPY_SCHEME
116-
value: https
117-
- name: OPS_COPY_SCHEME
118-
value: https
119-
- name: VERBOSE
120-
value: "true"
133+
- name: ES_COPY
134+
value: "true"
135+
- name: ES_COPY_SCHEME
136+
value: https
137+
- name: OPS_COPY_SCHEME
138+
value: https
139+
- name: VERBOSE
140+
value: "true"
121141
EOF
122142

123143
# add this back to the dc config
144+
docopy=`mktemp`
124145
cat $nocopy | \
125-
sed '/^ - env:/r '$envpatch | \
126-
oc replace -f -
146+
sed '/^ *- env:/r '$envpatch > $docopy
127147

128-
rm -f $envpatch $nocopy
148+
cat $docopy | \
149+
oc replace -f -
129150

130-
restart_fluentd
131-
fpod=`get_running_pod fluentd`
151+
redeploy_fluentd
152+
rm -f $envpatch $nocopy $docopy
132153

133154
write_and_verify_logs 2 || {
134155
oc get events -o yaml > $ARTIFACT_DIR/all-events.yaml 2>&1
@@ -139,9 +160,6 @@ write_and_verify_logs 2 || {
139160
oc replace --force -f $origconfig
140161
rm -f $origconfig
141162

142-
restart_fluentd
143-
fpod=`get_running_pod fluentd`
144-
145163
write_and_verify_logs 1 || {
146164
oc get events -o yaml > $ARTIFACT_DIR/all-events.yaml 2>&1
147165
exit 1

0 commit comments

Comments
 (0)