diff --git a/.github/workflows/build_and_test.yaml b/.github/workflows/build_and_test.yaml index e16a4bee57f..7f168d6ab91 100644 --- a/.github/workflows/build_and_test.yaml +++ b/.github/workflows/build_and_test.yaml @@ -144,6 +144,9 @@ jobs: - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - uses: ./tools/github-actions/setup-deps + - name: Setup Graphviz + uses: ts-graphviz/setup-graphviz@v2 + # Benchmark - name: Run Benchmark tests env: diff --git a/.github/workflows/experimental_conformance.yaml b/.github/workflows/experimental_conformance.yaml index 4c2d2e60f06..a3e2fd894d3 100644 --- a/.github/workflows/experimental_conformance.yaml +++ b/.github/workflows/experimental_conformance.yaml @@ -3,10 +3,11 @@ on: push: paths: - 'charts/gateway-helm/crds/gatewayapi-crds.yaml' + - 'test/conformance/experimental_conformance_test.go' pull_request: paths: - 'charts/gateway-helm/crds/gatewayapi-crds.yaml' - - 'test/conformance/*.go' + - 'test/conformance/experimental_conformance_test.go' # Add workflow_dispatch to trigger this workflow manually by maintainers. workflow_dispatch: diff --git a/.github/workflows/latest_release.yaml b/.github/workflows/latest_release.yaml index c3f23909e37..be8af0ae824 100644 --- a/.github/workflows/latest_release.yaml +++ b/.github/workflows/latest_release.yaml @@ -25,6 +25,9 @@ jobs: - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - uses: ./tools/github-actions/setup-deps + - name: Setup Graphviz + uses: ts-graphviz/setup-graphviz@v2 + # Benchmark - name: Run Benchmark tests env: diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 841b9aabf2e..7da0055d2b0 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -18,6 +18,9 @@ jobs: - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - uses: ./tools/github-actions/setup-deps + - name: Setup Graphviz + uses: ts-graphviz/setup-graphviz@v2 + # Benchmark - name: Run Benchmark tests env: diff --git a/internal/troubleshoot/collect/config_dump.go b/internal/troubleshoot/collect/config_dump.go index fe9ff9558ad..7a5c9b38343 100644 --- a/internal/troubleshoot/collect/config_dump.go +++ b/internal/troubleshoot/collect/config_dump.go @@ -96,5 +96,5 @@ func configDump(cli kube.CLIClient, nn types.NamespacedName, includeEds bool) ([ if includeEds { reqPath = fmt.Sprintf("%s?include_eds", reqPath) } - return requestWithPortForwarder(cli, nn, 19000, reqPath) + return RequestWithPortForwarder(cli, nn, 19000, reqPath) } diff --git a/internal/troubleshoot/collect/prometheus_metrics.go b/internal/troubleshoot/collect/prometheus_metrics.go index 785b99719af..9f659a54a22 100644 --- a/internal/troubleshoot/collect/prometheus_metrics.go +++ b/internal/troubleshoot/collect/prometheus_metrics.go @@ -94,7 +94,7 @@ func (p PrometheusMetric) Collect(_ chan<- interface{}) (tbcollect.CollectorResu reqPath = v } - data, err := requestWithPortForwarder(cliClient, nn, port, reqPath) + data, err := RequestWithPortForwarder(cliClient, nn, port, reqPath) if err != nil { logs = append(logs, fmt.Sprintf("pod %s/%s is skipped because of err: %v", pod.Namespace, pod.Name, err)) continue @@ -121,7 +121,7 @@ func listPods(ctx context.Context, client kubernetes.Interface, namespace string return pods.Items, nil } -func requestWithPortForwarder(cli kube.CLIClient, nn types.NamespacedName, port int, reqPath string) ([]byte, error) { +func RequestWithPortForwarder(cli kube.CLIClient, nn types.NamespacedName, port int, reqPath string) ([]byte, error) { fw, err := kube.NewLocalPortForwarder(cli, nn, 0, port) if err != nil { return nil, err diff --git a/test/benchmark/suite/render.go b/test/benchmark/suite/render.go index dfe3f130ca4..199476f3e99 100644 --- a/test/benchmark/suite/render.go +++ b/test/benchmark/suite/render.go @@ -76,6 +76,10 @@ func RenderReport(writer io.Writer, name, description string, titleLevel int, re writeSection(writer, "Metrics", titleLevel+1, "") renderMetricsTable(writer, reports) + + writeSection(writer, "Profiles", titleLevel+1, "") + renderProfilesTable(writer, "Memory", "heap", titleLevel+2, reports) + return nil } @@ -145,6 +149,17 @@ func renderMetricsTable(writer io.Writer, reports []*BenchmarkReport) { _ = table.Flush() } +func renderProfilesTable(writer io.Writer, target, key string, titleLevel int, reports []*BenchmarkReport) { + writeSection(writer, target, titleLevel, "") + + for _, report := range reports { + // The image is not be rendered yet, so it is a placeholder for the path. + // The image will be rendered after the test has finished. + writeSection(writer, report.Name, titleLevel+1, + fmt.Sprintf("![%s-%s](%s.png)", key, report.Name, report.ProfilesPath[key])) + } +} + // writeSection writes one section in Markdown style, content is optional. func writeSection(writer io.Writer, title string, level int, content string) { md := fmt.Sprintf("\n%s %s\n", strings.Repeat("#", level), title) diff --git a/test/benchmark/suite/report.go b/test/benchmark/suite/report.go index d0176331ee4..b9ecbea9701 100644 --- a/test/benchmark/suite/report.go +++ b/test/benchmark/suite/report.go @@ -13,35 +13,51 @@ import ( "context" "fmt" "io" + "os" + "path" "strconv" + "strings" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" kube "github.com/envoyproxy/gateway/internal/kubernetes" + "github.com/envoyproxy/gateway/internal/troubleshoot/collect" prom "github.com/envoyproxy/gateway/test/utils/prometheus" ) type BenchmarkReport struct { - Name string - Result []byte - Metrics map[string]float64 // metricTableHeaderName:metricValue + Name string + Result []byte + Metrics map[string]float64 // metricTableHeaderName:metricValue + ProfilesPath map[string]string // profileKey:profileFilepath + ProfilesOutputDir string kubeClient kube.CLIClient promClient *prom.Client } -func NewBenchmarkReport(name string, kubeClient kube.CLIClient, promClient *prom.Client) *BenchmarkReport { - return &BenchmarkReport{ - Name: name, - Metrics: make(map[string]float64), - kubeClient: kubeClient, - promClient: promClient, +func NewBenchmarkReport(name, profilesOutputDir string, kubeClient kube.CLIClient, promClient *prom.Client) (*BenchmarkReport, error) { + if err := createDirIfNotExist(profilesOutputDir); err != nil { + return nil, err } + + return &BenchmarkReport{ + Name: name, + Metrics: make(map[string]float64), + ProfilesPath: make(map[string]string), + ProfilesOutputDir: profilesOutputDir, + kubeClient: kubeClient, + promClient: promClient, + }, nil } func (r *BenchmarkReport) Collect(ctx context.Context, job *types.NamespacedName) error { + if err := r.GetProfiles(ctx); err != nil { + return err + } + if err := r.GetMetrics(ctx); err != nil { return err } @@ -109,6 +125,33 @@ func (r *BenchmarkReport) GetMetrics(ctx context.Context) error { return nil } +func (r *BenchmarkReport) GetProfiles(ctx context.Context) error { + egPod, err := r.fetchEnvoyGatewayPod(ctx) + if err != nil { + return err + } + + // Memory heap profiles. + heapProf, err := collect.RequestWithPortForwarder( + r.kubeClient, types.NamespacedName{Name: egPod.Name, Namespace: egPod.Namespace}, 19000, "/debug/pprof/heap", + ) + if err != nil { + return err + } + + heapProfPath := path.Join(r.ProfilesOutputDir, fmt.Sprintf("heap.%s.pprof", r.Name)) + if err = os.WriteFile(heapProfPath, heapProf, 0o600); err != nil { + return fmt.Errorf("failed to write profiles %s: %w", heapProfPath, err) + } + + // Remove parent output report dir. + splits := strings.SplitN(heapProfPath, "/", 2)[0] + heapProfPath = strings.TrimPrefix(heapProfPath, splits+"/") + r.ProfilesPath["heap"] = heapProfPath + + return nil +} + // getLogsFromPod scrapes the logs directly from the pod (default container). func (r *BenchmarkReport) getLogsFromPod(ctx context.Context, pod *types.NamespacedName) ([]byte, error) { podLogOpts := corev1.PodLogOptions{} @@ -129,3 +172,19 @@ func (r *BenchmarkReport) getLogsFromPod(ctx context.Context, pod *types.Namespa return buf.Bytes(), nil } + +func (r *BenchmarkReport) fetchEnvoyGatewayPod(ctx context.Context) (*corev1.Pod, error) { + egPods, err := r.kubeClient.Kube().CoreV1(). + Pods("envoy-gateway-system"). + List(ctx, metav1.ListOptions{LabelSelector: "control-plane=envoy-gateway"}) + if err != nil { + return nil, err + } + + if len(egPods.Items) < 1 { + return nil, fmt.Errorf("failed to get any pods for envoy-gateway") + } + + // Using the first one pod as default envoy-gateway pod + return &egPods.Items[0], nil +} diff --git a/test/benchmark/suite/suite.go b/test/benchmark/suite/suite.go index 035ac68d5a8..58bbcce4cd2 100644 --- a/test/benchmark/suite/suite.go +++ b/test/benchmark/suite/suite.go @@ -104,14 +104,8 @@ func NewBenchmarkTestSuite(client client.Client, options BenchmarkOptions, // Ensure the report directory exist. if len(reportDir) > 0 { - if _, err = os.Stat(reportDir); err != nil { - if os.IsNotExist(err) { - if err = os.MkdirAll(reportDir, os.ModePerm); err != nil { - return nil, err - } - } else { - return nil, err - } + if err = createDirIfNotExist(reportDir); err != nil { + return nil, err } } @@ -232,7 +226,11 @@ func (b *BenchmarkTestSuite) Benchmark(t *testing.T, ctx context.Context, name, t.Logf("Running benchmark test: %s successfully", name) - report := NewBenchmarkReport(name, b.kubeClient, b.promClient) + report, err := NewBenchmarkReport(name, path.Join(b.ReportSaveDir, "profiles"), b.kubeClient, b.promClient) + if err != nil { + return nil, fmt.Errorf("failed to create benchmark report: %w", err) + } + // Get all the reports from this benchmark test run. if err = report.Collect(ctx, jobNN); err != nil { return nil, err @@ -392,3 +390,15 @@ func (b *BenchmarkTestSuite) RegisterCleanup(t *testing.T, ctx context.Context, t.Logf("Clean up complete!") }) } + +func createDirIfNotExist(dir string) (err error) { + if _, err = os.Stat(dir); err != nil { + if os.IsNotExist(err) { + if err = os.MkdirAll(dir, os.ModePerm); err == nil { + return nil + } + } + return err + } + return nil +} diff --git a/test/conformance/experimental_conformance_test.go b/test/conformance/experimental_conformance_test.go index 770abbc1c8a..d337dbfe526 100644 --- a/test/conformance/experimental_conformance_test.go +++ b/test/conformance/experimental_conformance_test.go @@ -35,6 +35,7 @@ func TestExperimentalConformance(t *testing.T) { opts.ConformanceProfiles = sets.New( suite.GatewayHTTPConformanceProfileName, suite.GatewayTLSConformanceProfileName, + suite.GatewayGRPCConformanceProfileName, ) t.Logf("Running experimental conformance tests with %s GatewayClass\n cleanup: %t\n debug: %t\n enable all features: %t \n conformance profiles: [%v]", diff --git a/tools/make/kube.mk b/tools/make/kube.mk index 354781bd9fd..38a3d5b977c 100644 --- a/tools/make/kube.mk +++ b/tools/make/kube.mk @@ -79,6 +79,7 @@ kube-deploy-for-benchmark-test: manifests helm-generate ## Install Envoy Gateway helm install eg charts/gateway-helm --set deployment.envoyGateway.imagePullPolicy=$(IMAGE_PULL_POLICY) \ --set deployment.envoyGateway.resources.limits.cpu=$(BENCHMARK_CPU_LIMITS) \ --set deployment.envoyGateway.resources.limits.memory=$(BENCHMARK_MEMORY_LIMITS) \ + --set config.envoyGateway.admin.enablePprof=true \ -n envoy-gateway-system --create-namespace --debug --timeout='$(WAIT_TIMEOUT)' --wait --wait-for-jobs # Install Prometheus-server only helm install eg-addons charts/gateway-addons-helm --set loki.enabled=false \ @@ -169,6 +170,12 @@ run-benchmark: install-benchmark-server ## Run benchmark tests kubectl wait --timeout=$(WAIT_TIMEOUT) -n envoy-gateway-system deployment/envoy-gateway --for=condition=Available kubectl apply -f test/benchmark/config/gatewayclass.yaml go test -v -tags benchmark -timeout $(BENCHMARK_TIMEOUT) ./test/benchmark --rps=$(BENCHMARK_RPS) --connections=$(BENCHMARK_CONNECTIONS) --duration=$(BENCHMARK_DURATION) --report-save-dir=$(BENCHMARK_REPORT_DIR) + # render benchmark profiles into image + dot -V + @for profile in $(wildcard test/benchmark/$(BENCHMARK_REPORT_DIR)/profiles/*.pprof); do \ + $(call log, "Rendering profile image for: $${profile}"); \ + go tool pprof -png $${profile} > $${profile}.png; \ + done .PHONY: install-benchmark-server install-benchmark-server: ## Install nighthawk server for benchmark test