-
Notifications
You must be signed in to change notification settings - Fork 57
321 lines (315 loc) · 14.8 KB
/
soaking.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
name: Soak tests
env:
AWS_DEFAULT_REGION: us-east-1
on:
schedule:
- cron: '0 14 * * 1,3,5' # Mon, Wed, Fri morning PST
workflow_dispatch:
inputs:
soak_config:
description: 'set memory/cpu threshold, soak time (s), emitter interval'
required: false
default: '-t 1800'
permissions:
id-token: write
contents: read
jobs:
soaking-test:
runs-on: ubuntu-22.04
name: Soak Test - (${{ matrix.language }}, ${{ matrix.sample-app }}, ${{ matrix.instrumentation-type }}, ${{ matrix.architecture }})
strategy:
fail-fast: false
matrix:
# FIXME: (enowell) Both .NET and Go Sample Apps want to Soak Test
# the same collector-only lambda layer. We count on Soaking Tests to
# test whether a layer is ready for release. However, the current
# workflow can only test one Sample App per Lambda Layer. We should
# create a separate workflow to soak-test Layers with multiple
# soak tests.
language: [ go, java, nodejs, python ]
sample-app: [ aws-sdk ]
instrumentation-type: [ wrapper ]
architecture: [ amd64, arm64 ]
include:
# FIXME: (enowell) Same problem as above, we cannot Soak Test the
# other java app (okhttp) because it will create its own Lambda Layer
# instead of soak test the same one as the `aws-sdk` sample app.
- language: java
sample-app: aws-sdk
instrumentation-type: agent
architecture: amd64
- language: java
sample-app: aws-sdk
instrumentation-type: agent
architecture: arm64
outputs:
go-wrapper-error: ${{ steps.set-layer-if-error-output.outputs.go-wrapper-error }}
nodejs-wrapper-error: ${{ steps.set-layer-if-error-output.outputs.nodejs-wrapper-error }}
python-wrapper-error: ${{ steps.set-layer-if-error-output.outputs.python-wrapper-error }}
java-agent-error: ${{ steps.set-layer-if-error-output.outputs.java-agent-error }}
java-wrapper-error: ${{ steps.set-layer-if-error-output.outputs.java-wrapper-error }}
# NOTE: (enowell) When we release a Lambda Layer, we will ALWAYS release
# all the architectures TOGETHER. So all architectures will be at the same
# version.
go-wrapper-version: ${{ steps.set-collector-layer-version-output.outputs.go-wrapper-version }}
nodejs-wrapper-version: ${{ steps.set-sdk-layer-version-output.outputs.nodejs-wrapper-version }}
python-wrapper-version: ${{ steps.set-sdk-layer-version-output.outputs.python-wrapper-version }}
java-agent-version: ${{ steps.set-sdk-layer-version-output.outputs.java-agent-version }}
java-wrapper-version: ${{ steps.set-sdk-layer-version-output.outputs.java-wrapper-version }}
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- uses: actions/setup-go@v5
with:
go-version: '~1.23.4'
check-latest: true
- uses: actions/setup-java@v4
if: ${{ matrix.language == 'java' }}
with:
distribution: corretto
java-version: '17'
- name: Cache (Java)
uses: actions/cache@v4
if: ${{ matrix.language == 'java' }}
with:
path: |
~/go/pkg/mod
~/.gradle/caches
~/.gradle/wrapper
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }}-go-${{ hashFiles('**/go.sum') }}
restore-keys: |
${{ runner.os }}-gradle-
- name: Get default soaking test configuration
# CPU baseline was obtained empirically based on the max value observed for Go in a complete run of soak test
if: ${{ matrix.language != 'java' }}
run: |
echo SOAKING_TEST_CONFIG="-c 120 -m 70" | tee --append $GITHUB_ENV
- name: Get java soaking test configuration
# NOTE (enowell): Java's JVM is heavy and needs more memory than others.
if: ${{ matrix.language == 'java' }}
run: |
echo SOAKING_TEST_CONFIG="-c 200 -m 90" | tee --append $GITHUB_ENV
- uses: actions/setup-node@v4
if: ${{ matrix.language == 'nodejs' }}
with:
node-version: '16'
- name: Cache (NodeJS)
uses: actions/cache@v4
if: ${{ matrix.language == 'nodejs' }}
with:
path: |
~/go/pkg/mod
~/.npm
key: ${{ runner.os }}-node-${{ hashFiles('**/package.json') }}-go-${{ hashFiles('**/go.sum') }}
restore-keys: |
${{ runner.os }}-node-
- uses: actions/setup-python@v5
if: ${{ matrix.language == 'python' }}
with:
python-version: '3.x'
- name: Cache (Python)
uses: actions/cache@v4
if: ${{ matrix.language == 'python' }}
with:
path: |
~/go/pkg/mod
~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}-go-${{ hashFiles('**/go.sum') }}
restore-keys: |
${{ runner.os }}-pip-
- uses: actions/setup-dotnet@v4
if: ${{ matrix.language == 'dotnet' }}
with:
dotnet-version: '6.0.405'
- uses: aws-actions/[email protected]
with:
role-to-assume: ${{ secrets.INTEG_TEST_LAMBDA_ROLE_ARN }}
mask-aws-account-id: false
aws-region: ${{ env.AWS_DEFAULT_REGION }}
# Default session duration is 1 hour with OIDC.
role-duration-seconds: 14400 # 4 hours
- name: Patch ADOT
run: ./patch-upstream.sh
# Login to ECR since may be needed for Python build image.
- name: Login to Public ECR
uses: docker/login-action@v3
with:
registry: public.ecr.aws
- name: Build layers / functions
run: GOARCH=${{ matrix.architecture }} ./build.sh ${{ matrix.architecture }}
working-directory: ${{ matrix.language }}
- name: Get Lambda Layer `amd64` architecture value
if: ${{ matrix.architecture == 'amd64' }}
run: echo LAMBDA_FUNCTION_ARCH=x86_64 | tee --append $GITHUB_ENV
- name: Get Lambda Layer `arm64` architecture value
if: ${{ matrix.architecture == 'arm64' }}
run: echo LAMBDA_FUNCTION_ARCH=arm64 | tee --append $GITHUB_ENV
- name: Get terraform directory
run: |
echo TERRAFORM_DIRECTORY=${{ matrix.language }}/integration-tests/${{ matrix.sample-app }}/${{ matrix.instrumentation-type }} |
tee --append $GITHUB_ENV
- uses: hashicorp/setup-terraform@v2
with:
terraform_version: 1.3.1
- name: Initialize terraform
run: terraform init
working-directory: ${{ env.TERRAFORM_DIRECTORY }}
- name: Get terraform Lambda function name
run: |
echo TERRAFORM_LAMBDA_FUNCTION_NAME=lambda-${{ matrix.language }}-${{ matrix.sample-app }}-${{ matrix.instrumentation-type }}-${{ matrix.architecture }}-${{ github.run_id }} |
tee --append $GITHUB_ENV
# NOTE: (enowell) We don't need to include `sample-app` in the Lambda
# Layer name because different apps should be use the same layer, not
# create their own. However, if we ever Soak Test multiple apps, we need
# to BE CAREFUL about not creating duplicate layer with the same name.
- name: Get terraform Lambda layer name
run: |
echo TERRAFORM_LAMBDA_LAYER_NAME=aws-otel-${{ matrix.language }}-${{ matrix.instrumentation-type }}-${{ matrix.architecture }}-${{ github.sha }} |
tee --append $GITHUB_ENV
- name: Apply terraform
run: terraform apply -auto-approve
working-directory: ${{ env.TERRAFORM_DIRECTORY }}
env:
TF_VAR_sdk_layer_name: ${{ env.TERRAFORM_LAMBDA_LAYER_NAME }}
TF_VAR_collector_layer_name: ${{ env.TERRAFORM_LAMBDA_LAYER_NAME }}
TF_VAR_function_name: ${{ env.TERRAFORM_LAMBDA_FUNCTION_NAME }}
TF_VAR_architecture: ${{ env.LAMBDA_FUNCTION_ARCH }}
- name: Extract endpoint
id: extract-endpoint
run: terraform output -raw api-gateway-url
working-directory: ${{ env.TERRAFORM_DIRECTORY }}
- name: Extract AMP endpoint
if: ${{ matrix.language == 'java' && matrix.sample-app == 'aws-sdk' && matrix.instrumentation-type == 'agent' }}
id: extract-amp-endpoint
run: terraform output -raw amp_endpoint
working-directory: ${{ env.TERRAFORM_DIRECTORY }}
- name: Extract SDK layer arn
id: extract-sdk-layer-arn
if: ${{ matrix.language != 'dotnet' && matrix.language != 'go' }}
run: terraform output -raw sdk_layer_arn
working-directory: ${{ env.TERRAFORM_DIRECTORY }}
- name: Extract Collector layer arn
id: extract-collector-layer-arn
if: ${{ matrix.language == 'dotnet' || matrix.language == 'go' }}
run: terraform output -raw collector_layer_arn
working-directory: ${{ env.TERRAFORM_DIRECTORY }}
# NOTE: (enowell) `terraform output` outputs additional text we are
# not interested in because the `hashicorp/setup-terraform@v1` has a
# wrapper. We solve this by using separate steps, because this text
# doesn't show up when accessed in later steps.
#
# See more: https://github.com/hashicorp/setup-terraform/issues/20
- name: Set SDK layer version output
id: set-sdk-layer-version-output
if: ${{ matrix.language != 'dotnet' && matrix.language != 'go' }}
run: |
version=$(echo "${{ steps.extract-sdk-layer-arn.outputs.stdout }}" | cut -d : -f 8)
echo "Found version number: $version"
echo "${{ matrix.language }}-${{ matrix.instrumentation-type }}-version=$version" >> $GITHUB_OUTPUT
working-directory: ${{ env.TERRAFORM_DIRECTORY }}
- name: Set Collector layer version output
id: set-collector-layer-version-output
if: ${{ matrix.language == 'dotnet' || matrix.language == 'go' }}
run: |
version=$(echo "${{ steps.extract-collector-layer-arn.outputs.stdout }}" | cut -d : -f 8)
echo "Found version number: $version"
echo "${{ matrix.language }}-${{ matrix.instrumentation-type }}-version=$version" >> $GITHUB_OUTPUT
working-directory: ${{ env.TERRAFORM_DIRECTORY }}
- name: Send request to endpoint
run: curl -sS ${{ steps.extract-endpoint.outputs.stdout }}
- name: Checkout test framework
uses: actions/checkout@v4
with:
repository: aws-observability/aws-otel-test-framework
path: test-framework
- name: validate trace sample
run: |
cp adot/utils/expected-templates/${{ matrix.language }}-${{ matrix.sample-app }}-${{ matrix.instrumentation-type }}.json \
test-framework/validator/src/main/resources/expected-data-template/lambdaExpectedTrace.mustache
cd test-framework
./gradlew :validator:run --args="-c default-lambda-validation.yml --endpoint ${{ steps.extract-endpoint.outputs.stdout }} --region ${{ env.AWS_DEFAULT_REGION }}"
- name: validate java agent metric sample
if: ${{ matrix.language == 'java' && matrix.sample-app == 'aws-sdk' && matrix.instrumentation-type == 'agent' }}
run: |
cp adot/utils/expected-templates/${{ matrix.language }}-${{ matrix.sample-app }}-${{ matrix.instrumentation-type }}-metric.json \
test-framework/validator/src/main/resources/expected-data-template/ampExpectedMetric.mustache
cd test-framework
./gradlew :validator:run --args="-c prometheus-static-metric-validation.yml --cortex-instance-endpoint ${{ steps.extract-amp-endpoint.outputs.stdout }} --region ${{ env.AWS_DEFAULT_REGION }}"
- name: Run soak test
run:
>-
docker run
--rm
-e AWS_DEFAULT_REGION
-e AWS_ACCESS_KEY_ID
-e AWS_SECRET_ACCESS_KEY
-e AWS_SESSION_TOKEN
public.ecr.aws/aws-otel-test/lambda-soak:latest
-n ${{ env.TERRAFORM_LAMBDA_FUNCTION_NAME }}
-e ${{ steps.extract-endpoint.outputs.stdout }}
${{ github.event.inputs.soak_config }}
${{ env.SOAKING_TEST_CONFIG }}
-a ${{ matrix.architecture }}
- name: Set output if layer Soak Tests has error
id: set-layer-if-error-output
if: ${{ failure() }}
run: echo "${{ matrix.language }}-${{ matrix.instrumentation-type }}-error=FAILED" >> $GITHUB_OUTPUT
- name: Remove sdk layers from terraform management to prevent deletion.
if: ${{ matrix.language != 'go' }}
run: terraform state rm aws_lambda_layer_version.sdk_layer
working-directory: ${{ env.TERRAFORM_DIRECTORY }}
- name: Remove collector layers from terraform management to prevent deletion.
if: ${{ matrix.language == 'go' }}
run: terraform state rm aws_lambda_layer_version.collector_layer
working-directory: ${{ env.TERRAFORM_DIRECTORY }}
- name: Destroy terraform
if: always()
run: terraform destroy -auto-approve
working-directory: ${{ env.TERRAFORM_DIRECTORY }}
env:
TF_VAR_architecture: ${{ env.LAMBDA_FUNCTION_ARCH }}
output-keywords:
if: ${{ always() }}
name: Output (${{ matrix.language }}, ${{ matrix.instrumentation-type }}) Layer Keyword
runs-on: ubuntu-22.04
needs:
- soaking-test
strategy:
fail-fast: false
matrix:
language: [ go, java, nodejs, python ]
instrumentation-type: [ wrapper ]
include:
- language: java
instrumentation-type: agent
steps:
- name: Confirm none of the architecture soak tests for the layer failed
run: |
AT_LEAST_ONE_LAYER_SOAK_TEST_FAILED=$(
echo '${{ toJSON(needs.soaking-test.outputs) }}' |
jq '
."${{ matrix.language }}-${{ matrix.instrumentation-type }}-error" == "FAILED"
' || echo false
)
[[ $AT_LEAST_ONE_LAYER_SOAK_TEST_FAILED == false ]]
- name: Output keyword for (${{ matrix.language }}, ${{ matrix.instrumentation-type }}) layer
run: |
VERSION=$(
echo '${{ toJSON(needs.soaking-test.outputs) }}' |
jq -r '."${{ matrix.language }}-${{ matrix.instrumentation-type }}-version"'
)
echo "::warning::Layer ARN Keyword: arn:aws:lambda:${{ env.AWS_DEFAULT_REGION }}:611364707713:layer:aws-otel-${{ matrix.language }}-${{ matrix.instrumentation-type }}-<ARCHITECTURE>-${{ github.sha }}:$VERSION"
publish-soaking-status:
needs: [soaking-test]
if: ${{ always() }}
uses: ./.github/workflows/publish-status.yml
with:
namespace: 'ADOT/GitHubActions'
repository: ${{ github.repository }}
branch: ${{ github.ref_name }}
workflow: soaking
success: ${{ needs.soaking-test.result == 'success' }}
region: us-west-2
secrets:
roleArn: ${{ secrets.METRICS_ROLE_ARN }}