|
4 | 4 | schedule: |
5 | 5 | # Run at 3 AM PST (11:00 UTC) - offset from existing nightly |
6 | 6 | - cron: '00 11 * * *' |
7 | | - push: |
8 | | - branches: |
9 | | - - add-nightly-throughput-stress-workflow |
10 | | - - cleanup-nightly-tps-workflow |
11 | 7 | workflow_dispatch: |
12 | 8 | inputs: |
13 | 9 | duration: |
|
25 | 21 | required: false |
26 | 22 | default: 360 |
27 | 23 | type: number |
| 24 | + is_experiment: |
| 25 | + description: 'Mark this run as an experiment (excluded from nightly dashboards)' |
| 26 | + required: false |
| 27 | + default: false |
| 28 | + type: boolean |
28 | 29 |
|
29 | 30 | permissions: |
30 | 31 | contents: read |
| 32 | + id-token: write |
31 | 33 |
|
32 | 34 | env: |
33 | 35 | # Workflow configuration |
34 | 36 | TEST_DURATION: ${{ inputs.duration || vars.NIGHTLY_TEST_DURATION || '5h' }} |
35 | 37 | TEST_TIMEOUT: ${{ inputs.timeout || vars.NIGHTLY_TEST_TIMEOUT || '5h30m' }} |
36 | 38 |
|
| 39 | + # AWS S3 metrics upload ARN |
| 40 | + AWS_S3_METRICS_UPLOAD_ROLE_ARN: ${{ vars.AWS_S3_METRICS_UPLOAD_ROLE_ARN }} |
| 41 | + |
37 | 42 | # Logging and artifacts |
38 | 43 | WORKER_LOG_DIR: /tmp/throughput-stress-logs |
39 | 44 |
|
|
42 | 47 | OMES_REF: main |
43 | 48 | RUN_ID: ${{ github.run_id }}-throughput-stress |
44 | 49 |
|
| 50 | + # Prometheus version |
| 51 | + PROM_VERSION: "3.8.0" |
| 52 | + |
| 53 | + # Language |
| 54 | + SDK_LANG: "go" |
| 55 | + |
45 | 56 | jobs: |
46 | 57 | throughput-stress: |
47 | 58 | runs-on: ubuntu-latest-4-cores |
48 | 59 | timeout-minutes: ${{ fromJSON(inputs.job_timeout_minutes || (vars.NIGHTLY_JOB_TIMEOUT_MINUTES || '360')) }} |
49 | | - permissions: |
50 | | - contents: read |
51 | | - actions: write |
52 | 60 |
|
53 | 61 | steps: |
54 | 62 | - name: Print test configuration |
|
85 | 93 | - name: Install Temporal CLI |
86 | 94 | uses: temporalio/setup-temporal@v0 |
87 | 95 |
|
| 96 | + - name: Install Prometheus |
| 97 | + run: | |
| 98 | + wget -q https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/prometheus-${PROM_VERSION}.linux-amd64.tar.gz |
| 99 | + tar xzf prometheus-${PROM_VERSION}.linux-amd64.tar.gz |
| 100 | + sudo mv prometheus-${PROM_VERSION}.linux-amd64/prometheus /usr/local/bin/ |
| 101 | + prometheus --version |
| 102 | +
|
88 | 103 | - name: Setup log directory |
89 | 104 | run: mkdir -p $WORKER_LOG_DIR |
90 | 105 |
|
@@ -112,19 +127,47 @@ jobs: |
112 | 127 | # to give CI a bit more time for visibility consistency |
113 | 128 | go run ./cmd run-scenario-with-worker \ |
114 | 129 | --scenario throughput_stress \ |
115 | | - --language go \ |
| 130 | + --language $SDK_LANG \ |
116 | 131 | --version $(pwd)/.. \ |
117 | 132 | --run-id $RUN_ID \ |
118 | 133 | --duration $TEST_DURATION \ |
119 | 134 | --timeout $TEST_TIMEOUT \ |
120 | 135 | --max-concurrent 10 \ |
| 136 | + --prom-listen-address 127.0.0.1:9091 \ |
| 137 | + --worker-prom-listen-address 127.0.0.1:9092 \ |
| 138 | + --prom-instance-addr 127.0.0.1:9090 \ |
| 139 | + --prom-instance-config \ |
| 140 | + --prom-export-worker-metrics $RUN_ID.parquet \ |
121 | 141 | --option internal-iterations=10 \ |
122 | 142 | --option continue-as-new-after-iterations=3 \ |
123 | 143 | --option sleep-time=1s \ |
124 | 144 | --option visibility-count-timeout=5m \ |
125 | 145 | --option min-throughput-per-hour=1000 \ |
126 | 146 | 2>&1 | tee $WORKER_LOG_DIR/scenario.log |
127 | 147 |
|
| 148 | + - name: Configure AWS credentials |
| 149 | + if: always() |
| 150 | + uses: aws-actions/configure-aws-credentials@v4 |
| 151 | + with: |
| 152 | + role-to-assume: ${{ env.AWS_S3_METRICS_UPLOAD_ROLE_ARN }} |
| 153 | + aws-region: us-west-2 |
| 154 | + |
| 155 | + - name: Upload metrics to S3 |
| 156 | + if: always() |
| 157 | + env: |
| 158 | + GH_REF: ${{ github.ref }} |
| 159 | + IS_EXPERIMENT_INPUT: ${{ inputs.is_experiment }} |
| 160 | + run: | |
| 161 | + DATE=$(date +%Y-%m-%d) |
| 162 | + IS_EXPERIMENT="false" |
| 163 | + # Set as an experiment if we are not on the main branch or input as an experiment |
| 164 | + if [[ "$GH_REF" != "refs/heads/main" || "$IS_EXPERIMENT_INPUT" == "true" ]]; then |
| 165 | + IS_EXPERIMENT="true" |
| 166 | + fi |
| 167 | + echo "Uploading metrics: is_experiment=$IS_EXPERIMENT, language=$SDK_LANG, date=$DATE" |
| 168 | + aws s3 cp omes/$RUN_ID.parquet \ |
| 169 | + "s3://cloud-data-ingest-prod/github/sdk_load_test/is_experiment=$IS_EXPERIMENT/language=$SDK_LANG/date=$DATE/$RUN_ID.parquet" |
| 170 | +
|
128 | 171 | - name: Upload logs on failure/cancellation |
129 | 172 | if: failure() || cancelled() |
130 | 173 | uses: actions/upload-artifact@v4 |
|
0 commit comments