diff --git a/Dockerfile b/Dockerfile index 8786c22..a18c8a2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -24,4 +24,5 @@ LABEL org.opencontainers.image.title="OPNsense Prometheus Exporter" LABEL org.opencontainers.image.description="Prometheus exporter for OPNsense" COPY --from=build /usr/bin/opnsense-exporter / +EXPOSE 8080 ENTRYPOINT ["/opnsense-exporter"] diff --git a/README.md b/README.md index d9796c5..0bd0c9a 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ The missing OPNsense exporter for Prometheus - **[Docker](#docker)** - **[Docker Compose](#docker-compose)** - **[Systemd](#systemd)** - - **[K8s](#k8s)** + - **[K8s](./deploy/k8s/readme.md)** - **[Configuration](#configuration)** - **[OPNsense API](#opnsense-api)** - **[SSL/TLS](#ssltls)** @@ -76,9 +76,10 @@ services: - --web.listen-address=:8080 #- --exporter.disable-arp-table #- --exporter.disable-cron-table + #- .... environment: - OPS_API_KEY: - OPS_API_SECRET: + OPNSENSE_EXPORTER_OPS_API_KEY: "" + OPNSENSE_EXPORTER_OPS_API_SECRET: "" ports: - "8080:8080" ``` @@ -88,8 +89,8 @@ services: Create the secrets ```bash -echo "" | docker secret create opnsense-api-key - -echo "" | docker secret create opnsense-api-secret - +echo "" | docker secret create opnsense-api-key - +echo "" | docker secret create opnsense-api-secret - ``` Run the compose @@ -108,6 +109,7 @@ services: - --web.listen-address=:8080 #- --exporter.disable-arp-table #- --exporter.disable-cron-table + #- .... environment: OPS_API_KEY_FILE: /run/secrets/opnsense-api-key OPS_API_SECRET_FILE: /run/secrets/opnsense-api-secret @@ -122,9 +124,6 @@ services: **TODO** -### K8s - -Is covered in the [deploy/k8s](./deploy/k8s/readme.md) directory. ## Configuration diff --git a/VERSION b/VERSION index 6812f81..05b19b1 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.0.3 \ No newline at end of file +0.0.4 \ No newline at end of file diff --git a/deploy/k8s/deployment.yaml b/deploy/k8s/deployment.yaml index e5fb75b..00398bf 100644 --- a/deploy/k8s/deployment.yaml +++ b/deploy/k8s/deployment.yaml @@ -34,7 +34,6 @@ spec: # Default value for --web.listen-address= is 8080 - name: metrics-http containerPort: 8080 - # If /metrics returns 200/OK, we can assume pod is OK to take traffic # Note: this will NOT catch issues with exporter/config itself. # e.g.: exporter comes up with _some_ metrics even if invalid API key... @@ -73,14 +72,16 @@ spec: # - name: OPNSENSE_EXPORTER_OPS_INSECURE # value: "true" - # In basic testing, I see memory around 15MB and CPU is negligible + # in basic testing with a home lab OPNsense 100m CPU and 64Mi memory are sufficient + # however if your opnsense instance has a large number of rules, interfaces, etc... + # you may need to adjust these values resources: requests: - memory: 32Mi - cpu: 10m - limits: memory: 64Mi - cpu: 20m + cpu: 100m + limits: + memory: 128Mi + cpu: 500m volumes: - name: api-key-vol secret: diff --git a/deploy/k8s/readme.md b/deploy/k8s/readme.md index 8fd2660..66b36a7 100644 --- a/deploy/k8s/readme.md +++ b/deploy/k8s/readme.md @@ -1,7 +1,5 @@ # Simple Kubernetes Deployment -Included are two files: - - `deployment.yaml`: sets up a bare-bones deployment for just the exporter and a service to expose it to the rest of the cluster. - `scrape.yaml`: a [`ScrapeConfig`](https://prometheus-operator.dev/docs/user-guides/scrapeconfig/) CRD which will configure Prometheus to scrape metrics from the exporter diff --git a/deploy/k8s/scrape.yaml b/deploy/k8s/scrape.yaml index b5e3556..e5d8beb 100644 --- a/deploy/k8s/scrape.yaml +++ b/deploy/k8s/scrape.yaml @@ -5,17 +5,17 @@ apiVersion: monitoring.coreos.com/v1alpha1 kind: ScrapeConfig metadata: - name: opnsense-exporter - labels: - # If Prometheus Operator is installed with Helm chart, default config requires ScrapeConfig to have this label - release: "kube-prom" + name: opnsense-exporter + labels: + # If Prometheus Operator is installed with Helm chart, default config requires ScrapeConfig to have this label + release: "kube-prom" spec: - scrapeInterval: 60s - scrapeTimeout: 3s - metricsPath: /metrics - staticConfigs: - - labels: - # if the job label is not set, one will be automatically generated from the scrape-config name and namespace - job: opnsense-exporter - targets: - - opnsense-exporter.svc:8080 + scrapeInterval: 60s + scrapeTimeout: 3s + metricsPath: /metrics + staticConfigs: + - labels: + # if the job label is not set, one will be automatically generated from the scrape-config name and namespace + job: opnsense-exporter + targets: + - opnsense-exporter.svc:8080 diff --git a/docs/metrics.md b/docs/metrics.md index 5c8be39..9838cd7 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -1,10 +1,11 @@ ## OPNsense Exporter Metrics List -This table represent each metric and it's labels, the subsystem that it belongs, its description and how to disable it. +This table represent each metric and it's labels, the subsystem that it belongs, its description and how to disable it. The opnsense_instance label is applied to all metrics. | Metric Name | Type | Labels | Subsystem | Description | Disable Flag | | --- | --- | --- | --- | --- | --- | +opnsense_up | Gauge | opnsense_instance | n/a | The current status of OPNsense (1 = up, 0 = down) | n/a | opnsense_exporter_scrapes_total | Counter | n/a | n/a | Total number of scrapes by the OPNsense exporter | n/a | opnsense_exporter_endpoint_errors_total | Counter | endpoint | n/a | Total number of errors by endpoint returned by the OPNsense API during data fetching | n/a | opnsense_arp_table_entries | Gauge | expired, hostname, interface_description, ip, mac, permanent, type | ARP Table | Arp entries by ip, mac, hostname, interface description, type, expired and permanent | --exporter.disable-arp-table | @@ -14,11 +15,18 @@ opnsense_gateways_loss_percentage | Gauge | address, name | Gateways | The curre opnsense_gateways_rtt_milliseconds | Gauge | address, name | Gateways | RTT is the average (mean) of the round trip time in milliseconds by name and address | n/a | opnsense_gateways_rttd_milliseconds | Gauge | address, name | Gateways | RTTd is the standard deviation of the round trip time in milliseconds by name and address | n/a | opnsense_openvpn_instances | Gauge | description, device_type, role, uuid | OpenVPN | OpenVPN instances (1 = enabled, 0 = disabled) by role (server, client) | n/a | -opnsense_protocol_arp_sent_requests_total | Counter | n/a | Protocol Statistics | Total Number of sent ARP requests by the system | n/a | -opnsense_protocol_arp_received_requests_total | Counter | n/a | Protocol Statistics | Total Number of received ARP requests by the system | n/a | -opnsense_protocol_tcp_sent_packets_total | Counter | n/a | Protocol Statistics | Total Number of sent TCP packets by the system | n/a | -opnsense_protocol_tcp_received_packets_total | Counter | n/a | Protocol Statistics | Total Number of received TCP packets by the system | n/a | +opnsense_protocol_arp_sent_requests_total | Counter | n/a | Protocol Statistics | Total Number of sent ARP requests | n/a | +opnsense_protocol_arp_received_requests_total | Counter | n/a | Protocol Statistics | Total Number of received ARP requests | n/a | +opnsense_protocol_tcp_sent_packets_total | Counter | n/a | Protocol Statistics | Total Number of sent TCP packets | n/a | +opnsense_protocol_tcp_received_packets_total | Counter | n/a | Protocol Statistics | Total Number of received TCP packets | n/a | opnsense_protocol_tcp_connection_count_by_state | Gauge | state | Protocol Statistics | Number of TCP connections by state | n/a | +opnsense_protocol_udp_delivered_packets_total | Counter | n/a | Protocol Statistics | Total Number of delivered UDP packets | n/a | +opnsense_protocol_udp_output_packets_total | Counter | n/a | Protocol Statistics | Total Number of output UDP packets | n/a | +opnsense_protocol_udp_received_datagrams_total | Counter | n/a | Protocol Statistics | Total Number of received UDP Datagrams | n/a | +opnsense_protocol_udp_dropped_by_reason_total | CounterVector | reason | Protocol Statistics | Total Number of dropped UDP packets by reason | n/a | +opnsense_protocol_icmp_calls_total | Counter | n/a | Protocol Statistics | Total Number of ICMP calls | n/a | +opnsense_protocol_icmp_sent_packets_total | Counter | n/a | Protocol Statistics | Total Number of sent ICMP packets | n/a | +opnsense_protocol_icmp_dropped_by_reason_total | CounterVector | reason | Protocol Statistics | Total Number of dropped ICMP packets by reason | n/a | opnsense_services_running_total | Gauge | n/a | Services | Total number of running services | n/a | opnsense_services_stopped_total | Gauge | n/a | Services | Total number of stopped services | n/a | opnsense_services_status | Gauge | name, description | Services | Service status by name and description (1 = running, 0 = stopped) | n/a | diff --git a/internal/collector/collector.go b/internal/collector/collector.go index fee4b09..e640fcf 100644 --- a/internal/collector/collector.go +++ b/internal/collector/collector.go @@ -49,6 +49,7 @@ type Collector struct { log log.Logger collectors []CollectorInstance + isUp prometheus.Gauge scrapes prometheus.CounterVec endpointErrors prometheus.CounterVec } @@ -113,6 +114,15 @@ func New(client *opnsense.Client, log log.Logger, instanceName string, options . collector.Register(namespace, instanceName, c.log) } + c.isUp = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: namespace, + Name: "up", + Help: "Was the last scrape of OPNsense successful. (1 = yes, 0 = no)", + ConstLabels: prometheus.Labels{ + instanceLabelName: instanceName, + }, + }) + c.scrapes = *prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: namespace, Name: "exporter_scrapes_total", @@ -125,8 +135,9 @@ func New(client *opnsense.Client, log log.Logger, instanceName string, options . Help: "Total number of errors by endpoint returned by the OPNsense API during data fetching", }, []string{"endpoint", "opnsense_instance"}) - prometheus.MustRegister(c.scrapes) - prometheus.MustRegister(c.endpointErrors) + for _, metric := range []prometheus.Collector{c.isUp, c.scrapes, c.endpointErrors} { + prometheus.MustRegister(metric) + } c.scrapes.WithLabelValues(c.instanceLabel).Add(0) @@ -140,6 +151,7 @@ func New(client *opnsense.Client, log log.Logger, instanceName string, options . func (c *Collector) Describe(ch chan<- *prometheus.Desc) { c.scrapes.Describe(ch) c.endpointErrors.Describe(ch) + c.isUp.Describe(ch) for _, collector := range c.collectors { collector.Describe(ch) @@ -151,6 +163,26 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) { c.mutex.Lock() defer c.mutex.Unlock() + systemStatus, err := c.Client.HealthCheck() + + if err != nil { + level.Error(c.log).Log( + "msg", "failed to fetch system status", + "err", err, + ) + c.isUp.Set(0) + c.isUp.Collect(ch) + return + } + + if systemStatus.System.Status != opnsense.HealthCheckStatusOK { + c.isUp.Set(0) + c.isUp.Collect(ch) + return + } + + c.isUp.Set(1) + var wg sync.WaitGroup wg.Add(len(c.collectors)) @@ -173,4 +205,5 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) { c.scrapes.WithLabelValues(c.instanceLabel).Inc() c.scrapes.Collect(ch) c.endpointErrors.Collect(ch) + c.isUp.Collect(ch) } diff --git a/internal/collector/protocol.go b/internal/collector/protocol.go index a6e8b4c..ed7e406 100644 --- a/internal/collector/protocol.go +++ b/internal/collector/protocol.go @@ -8,14 +8,25 @@ import ( ) type protocolCollector struct { - log log.Logger - subsystem string - instance string + log log.Logger + subsystem string + instance string + tcpConnectionCountByState *prometheus.Desc tcpSentPackets *prometheus.Desc tcpReceivedPackets *prometheus.Desc - arpSentRequests *prometheus.Desc - arpReceivedRequests *prometheus.Desc + + arpSentRequests *prometheus.Desc + arpReceivedRequests *prometheus.Desc + + icmpCalls *prometheus.Desc + icmpSentPackets *prometheus.Desc + icmpDroppedByReason *prometheus.Desc + + udpDeliveredPackets *prometheus.Desc + udpOutputPackets *prometheus.Desc + udpReceivedDatagrams *prometheus.Desc + udpDroppedByReason *prometheus.Desc } func init() { @@ -58,6 +69,37 @@ func (c *protocolCollector) Register(namespace, instanceLabel string, log log.Lo "Number of received ARP requests", nil, ) + c.icmpCalls = buildPrometheusDesc(c.subsystem, "icmp_calls_total", + "Number of ICMP calls", + nil, + ) + c.icmpSentPackets = buildPrometheusDesc(c.subsystem, "icmp_sent_packets_total", + "Number of sent ICMP packets", + nil, + ) + c.icmpDroppedByReason = buildPrometheusDesc(c.subsystem, "icmp_dropped_by_reason_total", + "Number of dropped ICMP packets by reason", + []string{"reason"}, + ) + c.udpDeliveredPackets = buildPrometheusDesc(c.subsystem, "udp_delivered_packets_total", + "Number of delivered UDP packets", + nil, + ) + + c.udpOutputPackets = buildPrometheusDesc(c.subsystem, "udp_output_packets_total", + "Number of output UDP packets", + nil, + ) + + c.udpReceivedDatagrams = buildPrometheusDesc(c.subsystem, "udp_received_datagrams_total", + "Number of received UDP datagrams", + nil, + ) + + c.udpDroppedByReason = buildPrometheusDesc(c.subsystem, "udp_dropped_by_reason_total", + "Number of dropped UDP packets by reason", + []string{"reason"}, + ) } func (c *protocolCollector) Describe(ch chan<- *prometheus.Desc) { @@ -66,6 +108,13 @@ func (c *protocolCollector) Describe(ch chan<- *prometheus.Desc) { ch <- c.tcpReceivedPackets ch <- c.arpSentRequests ch <- c.arpReceivedRequests + ch <- c.icmpCalls + ch <- c.icmpSentPackets + ch <- c.icmpDroppedByReason + ch <- c.udpDeliveredPackets + ch <- c.udpOutputPackets + ch <- c.udpReceivedDatagrams + ch <- c.udpDroppedByReason } func (c *protocolCollector) Update(client *opnsense.Client, ch chan<- prometheus.Metric) *opnsense.APICallError { @@ -73,13 +122,11 @@ func (c *protocolCollector) Update(client *opnsense.Client, ch chan<- prometheus if err != nil { return err } - for state, count := range data.TCPConnectionCountByState { ch <- prometheus.MustNewConstMetric( c.tcpConnectionCountByState, prometheus.GaugeValue, float64(count), state, c.instance, ) } - ch <- prometheus.MustNewConstMetric( c.tcpSentPackets, prometheus.CounterValue, float64(data.TCPSentPackets), c.instance, ) @@ -96,5 +143,31 @@ func (c *protocolCollector) Update(client *opnsense.Client, ch chan<- prometheus c.arpReceivedRequests, prometheus.CounterValue, float64(data.ARPReceivedRequests), c.instance, ) + ch <- prometheus.MustNewConstMetric( + c.icmpCalls, prometheus.CounterValue, float64(data.ICMPCalls), c.instance, + ) + + ch <- prometheus.MustNewConstMetric( + c.icmpSentPackets, prometheus.CounterValue, float64(data.ICMPSentPackets), c.instance, + ) + for reason, count := range data.ICMPDroppedByReason { + ch <- prometheus.MustNewConstMetric( + c.icmpDroppedByReason, prometheus.GaugeValue, float64(count), reason, c.instance, + ) + } + ch <- prometheus.MustNewConstMetric( + c.udpDeliveredPackets, prometheus.CounterValue, float64(data.UDPDeliveredPackets), c.instance, + ) + ch <- prometheus.MustNewConstMetric( + c.udpOutputPackets, prometheus.CounterValue, float64(data.UDPOutputPackets), c.instance, + ) + ch <- prometheus.MustNewConstMetric( + c.udpReceivedDatagrams, prometheus.CounterValue, float64(data.UDPReceivedDatagrams), c.instance, + ) + for reason, count := range data.UDPDroppedByReason { + ch <- prometheus.MustNewConstMetric( + c.udpDroppedByReason, prometheus.GaugeValue, float64(count), reason, c.instance, + ) + } return nil } diff --git a/opnsense/client.go b/opnsense/client.go index f0c9749..12e6987 100644 --- a/opnsense/client.go +++ b/opnsense/client.go @@ -80,6 +80,7 @@ func NewClient(cfg options.OPNSenseConfig, userAgentVersion string, log log.Logg "unboundDNSStatus": "api/unbound/diagnostics/stats", "cronJobs": "api/cron/settings/searchJobs", "wireguardClients": "api/wireguard/service/show", + "healthCheck": "api/core/system/status", }, headers: map[string]string{ "Accept": "application/json", diff --git a/opnsense/health_check.go b/opnsense/health_check.go new file mode 100644 index 0000000..4287e9d --- /dev/null +++ b/opnsense/health_check.go @@ -0,0 +1,44 @@ +package opnsense + +type HealthCheckResponse struct { + CrashReporter struct { + StatusCode int `json:"statusCode"` + Message string `json:"message"` + LogLocation string `json:"logLocation"` + Timestamp string `json:"timestamp"` + Status string `json:"status"` + } `json:"CrashReporter"` + Firewall struct { + StatusCode int `json:"statusCode"` + Message string `json:"message"` + LogLocation string `json:"logLocation"` + Timestamp string `json:"timestamp"` + Status string `json:"status"` + } `json:"Firewall"` + System struct { + Status string `json:"status"` + } `json:"System"` +} + +const HealthCheckStatusOK = "OK" + +// HealthCheck checks if the OPNsense is up and running. +func (c *Client) HealthCheck() (HealthCheckResponse, error) { + var resp HealthCheckResponse + + path, ok := c.endpoints["healthCheck"] + + if !ok { + return HealthCheckResponse{}, &APICallError{ + Endpoint: "healthCheck", + Message: "endpoint not found", + StatusCode: 0, + } + } + + if err := c.do("GET", path, nil, &resp); err != nil { + return HealthCheckResponse{}, err + } + + return resp, nil +} diff --git a/opnsense/protocol_statistics.go b/opnsense/protocol_statistics.go index dea8dd9..8b9110c 100644 --- a/opnsense/protocol_statistics.go +++ b/opnsense/protocol_statistics.go @@ -181,22 +181,14 @@ type protocolStatisticsResponse struct { DiscardBadAddress int `json:"discard-bad-address"` } `json:"ip"` Icmp struct { - IcmpCalls int `json:"icmp-calls"` - ErrorsNotFromMessage int `json:"errors-not-from-message"` - OutputHistogram []struct { - Name string `json:"name"` - Count int `json:"count"` - } `json:"output-histogram"` - DroppedBadCode int `json:"dropped-bad-code"` - DroppedTooShort int `json:"dropped-too-short"` - DroppedBadChecksum int `json:"dropped-bad-checksum"` - DroppedBadLength int `json:"dropped-bad-length"` - DroppedMulticastEcho int `json:"dropped-multicast-echo"` - DroppedMulticastTimestamp int `json:"dropped-multicast-timestamp"` - InputHistogram []struct { - Name string `json:"name"` - Count int `json:"count"` - } `json:"input-histogram"` + IcmpCalls int `json:"icmp-calls"` + ErrorsNotFromMessage int `json:"errors-not-from-message"` + DroppedBadCode int `json:"dropped-bad-code"` + DroppedTooShort int `json:"dropped-too-short"` + DroppedBadChecksum int `json:"dropped-bad-checksum"` + DroppedBadLength int `json:"dropped-bad-length"` + DroppedMulticastEcho int `json:"dropped-multicast-echo"` + DroppedMulticastTimestamp int `json:"dropped-multicast-timestamp"` SentPackets int `json:"sent-packets"` DiscardInvalidReturnAddress int `json:"discard-invalid-return-address"` DiscardNoRoute int `json:"discard-no-route"` @@ -263,6 +255,13 @@ type ProtocolStatistics struct { ARPSentRequests int ARPReceivedRequests int TCPConnectionCountByState map[string]int + ICMPCalls int + ICMPSentPackets int + ICMPDroppedByReason map[string]int + UDPDeliveredPackets int + UDPOutputPackets int + UDPReceivedDatagrams int + UDPDroppedByReason map[string]int } func (c *Client) FetchProtocolStatistics() (ProtocolStatistics, *APICallError) { @@ -299,6 +298,28 @@ func (c *Client) FetchProtocolStatistics() (ProtocolStatistics, *APICallError) { "FIN_WAIT_2": resp.Statistics.TCP.TCPConnectionCountByState.FinWait2, "TIME_WAIT": resp.Statistics.TCP.TCPConnectionCountByState.TimeWait, }, + ICMPCalls: resp.Statistics.Icmp.IcmpCalls, + ICMPSentPackets: resp.Statistics.Icmp.SentPackets, + ICMPDroppedByReason: map[string]int{ + "BAD_CODE": resp.Statistics.Icmp.DroppedBadCode, + "TOO_SHORT": resp.Statistics.Icmp.DroppedTooShort, + "BAD_CHECKSUM": resp.Statistics.Icmp.DroppedBadChecksum, + "BAD_LENGTH": resp.Statistics.Icmp.DroppedBadLength, + "MULTICAST_ECHO": resp.Statistics.Icmp.DroppedMulticastEcho, + "MULTICAST_TIMESTAMP": resp.Statistics.Icmp.DroppedMulticastTimestamp, + }, + UDPDeliveredPackets: resp.Statistics.UDP.DeliveredPackets, + UDPOutputPackets: resp.Statistics.UDP.OutputPackets, + UDPReceivedDatagrams: resp.Statistics.UDP.ReceivedDatagrams, + UDPDroppedByReason: map[string]int{ + "INCOMPLETE_HEADERS": resp.Statistics.UDP.DroppedIncompleteHeaders, + "BAD_DATA_LENGTH": resp.Statistics.UDP.DroppedBadDataLength, + "BAD_CHECKSUM": resp.Statistics.UDP.DroppedBadChecksum, + "NO_CHECKSUM": resp.Statistics.UDP.DroppedNoChecksum, + "NO_SOCKET": resp.Statistics.UDP.DroppedNoSocket, + "BROADCAST_MULTICAST": resp.Statistics.UDP.DroppedBroadcastMulticast, + "FULL_SOCKET_BUFFER": resp.Statistics.UDP.DroppedFullSocketBuffer, + }, } return out, nil