diff --git a/README.md b/README.md index 8473ac2..a610464 100644 --- a/README.md +++ b/README.md @@ -50,8 +50,14 @@ The `ghe-reposec` tool can be configured using environment variables. Below are - `REPOSEC_LAVA_CONCURRENCY`: The number of concurrent Lava scans (default: `10`). - `REPOSEC_LAVA_BINARY_PATH`: The path to the Lava binary (default: `/usr/bin/lava`). - `REPOSEC_LAVA_CHECK_IMAGE`: The Lava check image (default: `vulcansec/vulcan-repository-sctrl:a20516f-4aae88d`). -- `LAVA_RESULTS_PATH`: The path where Lava results (stdout and stderr) will be stored if specified. +- `REPOSEC_LAVA_RESULTS_PATH`: The path where Lava results (stdout and stderr) will be stored if specified. +### Metrics Configuration + +- `REPOSEC_METRICS_ENABLED`: Enable metrics (default: `false`). +- `REPOSEC_METRICS_ADDRESS`: The statsd listener address (default: `localhost:8125`). +- `REPOSEC_METRICS_NAMESPACE`: The metrics namespace (default: `ghereposec`). +- `REPOSEC_METRICS_TAGS`: The metrics tags (default: `ghereposec:metrics`). Multiple tags can be specified separated by commas. ## Contributing diff --git a/go.mod b/go.mod index c7dae1a..23591f3 100644 --- a/go.mod +++ b/go.mod @@ -3,9 +3,15 @@ module github.com/adevinta/ghe-reposec go 1.23 require ( + github.com/DataDog/datadog-go v4.8.3+incompatible github.com/adevinta/vulcan-report v1.0.0 github.com/caarlos0/env/v11 v11.3.1 github.com/google/go-github/v67 v67.0.0 ) -require github.com/google/go-querystring v1.1.0 // indirect +require ( + github.com/Microsoft/go-winio v0.6.2 // indirect + github.com/google/go-querystring v1.1.0 // indirect + github.com/stretchr/testify v1.10.0 // indirect + golang.org/x/sys v0.10.0 // indirect +) diff --git a/go.sum b/go.sum index 3ecd9b5..3191083 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +1,13 @@ +github.com/DataDog/datadog-go v4.8.3+incompatible h1:fNGaYSuObuQb5nzeTQqowRAd9bpDIRRV4/gUtIBjh8Q= +github.com/DataDog/datadog-go v4.8.3+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= +github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= +github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/adevinta/vulcan-report v1.0.0 h1:44aICPZ+4svucgCSA5KmjlT3ZGzrvZXiSnkbnj6AC2k= github.com/adevinta/vulcan-report v1.0.0/go.mod h1:k34KaeoXc3H77WNMwI9F4F1G28hBjB95PeMUp9oHbEE= github.com/caarlos0/env/v11 v11.3.1 h1:cArPWC15hWmEt+gWk7YBi7lEXTXCvpaSdCiZE2X5mCA= github.com/caarlos0/env/v11 v11.3.1/go.mod h1:qupehSf/Y0TUTsxKywqRt/vJjN5nz6vauiYEUUr8P4U= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= @@ -9,4 +15,14 @@ github.com/google/go-github/v67 v67.0.0 h1:g11NDAmfaBaCO8qYdI9fsmbaRipHNWRIU/2YG github.com/google/go-github/v67 v67.0.0/go.mod h1:zH3K7BxjFndr9QSeFibx4lTKkYS3K9nDanoI1NjaOtY= github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8= github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA= +golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/config/config.go b/internal/config/config.go index b57f1c5..1d56a75 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -43,6 +43,14 @@ type LavaConfig struct { ResultsPath string `env:"LAVA_RESULTS_PATH"` } +// MetricsConfig represents the metrics configuration. +type MetricsConfig struct { + Enabled bool `env:"METRICS_ENABLED" envDefault:"false"` + Address string `env:"METRICS_ADDRESS" envDefault:"localhost:8125"` + Namespace string `env:"METRICS_NAMESPACE" envDefault:"ghereposec"` + Tags []string `env:"METRICS_TAGS" envSeparator:"," envDefault:"ghereposec:metrics"` +} + // Config represents the ghe-reposec configuration. type Config struct { LogLevel string `env:"LOG_LEVEL" envDefault:"info"` @@ -52,8 +60,9 @@ type Config struct { OutputFilePath string `env:"OUTPUT_FILE" envDefault:"/tmp/reposec.csv"` OutputFormat string `env:"OUTPUT_FORMAT" envDefault:"csv"` - GHECfg GHEConfig - LavaCfg LavaConfig + GHECfg GHEConfig + LavaCfg LavaConfig + MetricsCfg MetricsConfig } // Redacted returns a secret redacted version of the configuration. diff --git a/internal/github/github.go b/internal/github/github.go index 9bbafb9..8e8276c 100644 --- a/internal/github/github.go +++ b/internal/github/github.go @@ -14,6 +14,7 @@ import ( gh "github.com/google/go-github/v67/github" "github.com/adevinta/ghe-reposec/internal/config" + "github.com/adevinta/ghe-reposec/internal/metrics" ) var ( @@ -25,14 +26,15 @@ var ( // Client is a GitHub client wrapper. type Client struct { - cfg config.GHEConfig - client *gh.Client - logger *slog.Logger - ctx context.Context + cfg config.GHEConfig + client *gh.Client + logger *slog.Logger + metrics *metrics.Client + ctx context.Context } // NewClient creates a new GitHub Enterprise client. -func NewClient(ctx context.Context, logger *slog.Logger, cfg config.GHEConfig) (*Client, error) { +func NewClient(ctx context.Context, logger *slog.Logger, m *metrics.Client, cfg config.GHEConfig) (*Client, error) { if cfg.Token == "" { return nil, ErrTokenRequired } @@ -59,10 +61,11 @@ func NewClient(ctx context.Context, logger *slog.Logger, cfg config.GHEConfig) ( logger.Debug("GitHub Enterprise token", "owner", user.GetLogin()) return &Client{ - cfg: cfg, - logger: logger, - client: client, - ctx: ctx, + cfg: cfg, + logger: logger, + client: client, + metrics: m, + ctx: ctx, }, nil } @@ -108,6 +111,7 @@ func (c *Client) Repositories(targetOrg string) ([]string, error) { return []string{}, fmt.Errorf("failed to list organizations: %w", err) } } + c.metrics.Gauge("organizations", len(orgs), []string{}) c.logger.Debug("listing repositories") sem := make(chan struct{}, c.cfg.Concurrency) @@ -140,6 +144,16 @@ func orgRepositories(c *Client, org string, wg *sync.WaitGroup, sem chan struct{ c.logger.Debug("obtaining repositories for organization", "organization", org) + repoMetrics := map[string]int{ + "too_big": 0, + "empty": 0, + "archived": 0, + "disabled": 0, + "fork": 0, + "template": 0, + "inactive": 0, + "selected": 0, + } allRepos := []string{} listOpts := &gh.RepositoryListByOrgOptions{ListOptions: gh.ListOptions{PerPage: 100}} for { @@ -161,31 +175,37 @@ func orgRepositories(c *Client, org string, wg *sync.WaitGroup, sem chan struct{ // If repository is too big, skip it. if repo.Size != nil && *repo.Size > c.cfg.RepositorySizeLimit { c.logger.Warn("repository is too big, skipping", "size_kb", *repo.Size, "repository", repo.GetFullName()) + repoMetrics["too_big"]++ continue } // If repository is empty, skip it. if (repo.Size != nil && *repo.Size == 0) && !c.cfg.IncludeEmpty { c.logger.Warn("repository is empty, skipping", "repository", repo.GetFullName()) + repoMetrics["empty"]++ continue } // If repository is archived, skip it. if (repo.Archived != nil && *repo.Archived) && !c.cfg.IncludeArchived { c.logger.Warn("repository is archived, skipping", "repository", repo.GetFullName()) + repoMetrics["archived"]++ continue } // If repository is disabled, skip it. if (repo.Disabled != nil && *repo.Disabled) && !c.cfg.IncludeDisabled { c.logger.Warn("repository is disabled, skipping", "repository", repo.GetFullName()) + repoMetrics["disabled"]++ continue } // If repository is a fork, skip it. if (repo.Fork != nil && *repo.Fork) && !c.cfg.IncludeForks { c.logger.Warn("repository is a fork, skipping", "repository", repo.GetFullName()) + repoMetrics["fork"]++ continue } // If repository is a template, skip it. if (repo.IsTemplate != nil && *repo.IsTemplate) && !c.cfg.IncludeTemplates { c.logger.Warn("repository is a template, skipping", "repository", repo.GetFullName()) + repoMetrics["template"]++ continue } // If repository hadn't been active for a while, skip it. @@ -196,10 +216,12 @@ func orgRepositories(c *Client, org string, wg *sync.WaitGroup, sem chan struct{ if isUpdatedInactive && isPushedInactive { c.logger.Warn("repository has not been active for a while, skipping", "repository", repo.GetFullName()) + repoMetrics["inactive"]++ continue } } allRepos = append(allRepos, *repo.CloneURL) + repoMetrics["selected"]++ } if resp.NextPage == 0 { break @@ -208,6 +230,9 @@ func orgRepositories(c *Client, org string, wg *sync.WaitGroup, sem chan struct{ } c.logger.Debug("organization repository listing completed", "organization", org, "repositories", len(allRepos)) + for k, v := range repoMetrics { + c.metrics.Gauge("repositories", v, []string{"status:" + k}) + } resultChan <- allRepos } diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go new file mode 100644 index 0000000..5f71c4f --- /dev/null +++ b/internal/metrics/metrics.go @@ -0,0 +1,135 @@ +// Copyright 2025 Adevinta + +// Package metrics provides a wrapper to interact with StatsD. +package metrics + +import ( + "context" + "fmt" + "log/slog" + + "github.com/DataDog/datadog-go/statsd" + + "github.com/adevinta/ghe-reposec/internal/config" +) + +var ( + // ClientNotInitializedMsg is logged when the metrics client is not + // initialized and metrics are enabled. + ClientNotInitializedMsg = "metrics client not initialized" +) + +const ( + // DefaultMetricsClientAddr is the default metrics client address. + DefaultMetricsClientAddr = "localhost:8125" +) + +// Client represents a metrics service client. +type Client struct { + cfg config.MetricsConfig + client *statsd.Client + logger *slog.Logger + ctx context.Context +} + +// NewClient creates a new metrics client based on environment variables config. +func NewClient(ctx context.Context, logger *slog.Logger, cfg config.MetricsConfig) (*Client, error) { + if !cfg.Enabled { + logger.Info("metrics reporting disabled") + return &Client{}, nil + } + address := cfg.Address + if address == "" { + logger.Warn("metrics address not provided, using default", "address", DefaultMetricsClientAddr) + address = DefaultMetricsClientAddr + } + + statsd, err := statsd.New(address) + if err != nil { + return nil, err + } + + return &Client{ + cfg: cfg, + client: statsd, + logger: logger, + ctx: ctx, + }, nil +} + +// Gauge sends a gauge metric to the metrics service. +func (c *Client) Gauge(name string, value int, tags []string) { + if !c.cfg.Enabled { + return + } + if c.client == nil { + c.logger.Warn(ClientNotInitializedMsg) + return + } + tags = append(tags, c.cfg.Tags...) + name = fmt.Sprintf("%s.%s", c.cfg.Namespace, name) + err := c.client.Gauge(name, float64(value), tags, 1) + if err != nil { + c.logger.Error("gauge metric push error", "error", err) + return + } + c.logger.Debug("gauge metric pushed", "name", name, "value", value, "tags", tags) +} + +// ServiceCheck sends a service satus signal to the metrics service. +func (c *Client) ServiceCheck(status byte, message string, tags []string) { + if !c.cfg.Enabled { + return + } + if c.client == nil { + c.logger.Warn(ClientNotInitializedMsg) + return + } + tags = append(tags, c.cfg.Tags...) + name := fmt.Sprintf("%s.service_check", c.cfg.Namespace) + err := c.client.ServiceCheck(&statsd.ServiceCheck{ + Name: name, + Status: statsd.ServiceCheckStatus(status), + Tags: tags, + Message: message, + }) + if err != nil { + c.logger.Error("service check push error", "error", err) + return + } + c.logger.Debug("service check pushed", "status", status, "message", message) +} + +// Close closes the metrics client. +func (c *Client) Close() { + if !c.cfg.Enabled { + return + } + if c.client == nil { + c.logger.Warn(ClientNotInitializedMsg) + return + } + err := c.client.Close() + if err != nil { + c.logger.Error("metrics client close error", "error", err) + return + } + c.logger.Debug("metrics client closed") +} + +// Flush flushes the metrics client. +func (c *Client) Flush() { + if !c.cfg.Enabled { + return + } + if c.client == nil { + c.logger.Warn(ClientNotInitializedMsg) + return + } + err := c.client.Flush() + if err != nil { + c.logger.Error("metrics client flush error", "error", err) + return + } + c.logger.Debug("metrics client flushed") +} diff --git a/main.go b/main.go index 54522ab..88c6d0a 100644 --- a/main.go +++ b/main.go @@ -13,6 +13,7 @@ import ( "github.com/adevinta/ghe-reposec/internal/config" "github.com/adevinta/ghe-reposec/internal/github" "github.com/adevinta/ghe-reposec/internal/lava" + "github.com/adevinta/ghe-reposec/internal/metrics" "github.com/adevinta/ghe-reposec/internal/output" ) @@ -31,33 +32,82 @@ func main() { ctx := context.Background() - cli, err := github.NewClient(ctx, &logger, cfg.GHECfg) + metrics, err := metrics.NewClient(ctx, &logger, cfg.MetricsCfg) + if err != nil { + logger.Error("failed to create metrics client", "error", err) + os.Exit(1) + } + defer func() { + metrics.Flush() + metrics.Close() + }() + + cli, err := github.NewClient(ctx, &logger, metrics, cfg.GHECfg) if err != nil { logger.Error("failed to create GitHub client", "error", err) + metrics.ServiceCheck(2, err.Error(), []string{""}) os.Exit(1) } lava, err := lava.NewClient(ctx, &logger, cfg.LavaCfg) if err != nil { logger.Error("failed to create Lava client", "error", err) + metrics.ServiceCheck(2, err.Error(), []string{""}) os.Exit(1) } repos, err := cli.Repositories(cfg.TargetOrg) if err != nil { logger.Error("failed to fetch repositories", "error", err) + metrics.ServiceCheck(2, err.Error(), []string{""}) os.Exit(1) } logger.Info("repositories selected", "count", len(repos), "duration", time.Since(st).Seconds()) summary := lava.Scan(repos) + pushSummaryMetrics(metrics, summary) err = output.Write(cfg.OutputFormat, cfg.OutputFilePath, summary) if err != nil { logger.Error("failed to write output", "error", err) + metrics.ServiceCheck(2, err.Error(), []string{""}) os.Exit(1) } logger.Info("output written", "file", cfg.OutputFilePath) + metrics.Gauge("took", int(time.Since(st).Seconds()), []string{}) + metrics.ServiceCheck(0, "OK", []string{""}) + logger.Info("GitHub Enterprise reposec completed", "duration", time.Since(st).Seconds()) } + +func pushSummaryMetrics(m *metrics.Client, s []lava.Summary) { + sm := map[string]int{ + "with_controls": 0, + "without_controls": 0, + "error": 0, + } + cm := map[string]int{} + for _, s := range s { + if s.Error != "" { + sm["error"]++ + continue + } + if s.ControlInPlace { + sm["with_controls"]++ + } else { + sm["without_controls"]++ + } + for _, c := range s.Controls { + cm[c]++ + } + } + for k, v := range sm { + tags := []string{fmt.Sprintf("target:%s", k)} + m.Gauge("summary.status", v, tags) + } + for k, v := range cm { + tags := []string{fmt.Sprintf("control:%s", k)} + m.Gauge("summary.controls", v, tags) + } +}