Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pingdom_min_request_limit internal metric #15

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 12 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,17 +61,18 @@ on how to build your own image and push it to your private registry.

## Exported Metrics

| Metric Name | Description |
| --------------------------------------------------- | ------------------------------------------------------------------------------- |
| `pingdom_up` | Was the last query on Pingdom API successful |
| `pingdom_uptime_status` | The current status of the check (1: up, 0: down) |
| `pingdom_uptime_response_time_seconds` | The response time of last test, in seconds |
| `pingdom_slo_period_seconds` | Outage check period, in seconds (see `-outage-check-period` flag) |
| `pingdom_outages_total` | Number of outages within the outage check period |
| `pingdom_down_seconds` | Total down time within the outage check period, in seconds |
| `pingdom_up_seconds` | Total up time within the outage check period, in seconds |
| `pingdom_uptime_slo_error_budget_total_seconds` | Maximum number of allowed downtime, in seconds, according to the uptime SLO |
| `pingdom_uptime_slo_error_budget_available_seconds` | Number of seconds of downtime we can still have without breaking the uptime SLO |
| Metric Name | Description |
| --------------------------------------------------- |----------------------------------------------------------------------------------------------------------|
| `pingdom_up` | Was the last query on Pingdom API successful |
| `pingdom_rate_limit_remaining_requests` | The remaining requests allowed before hitting the short-term or long-term rate limit in the Pingdom API. |
| `pingdom_uptime_status` | The current status of the check (1: up, 0: down) |
| `pingdom_uptime_response_time_seconds` | The response time of last test, in seconds |
| `pingdom_slo_period_seconds` | Outage check period, in seconds (see `-outage-check-period` flag) |
| `pingdom_outages_total` | Number of outages within the outage check period |
| `pingdom_down_seconds` | Total down time within the outage check period, in seconds |
| `pingdom_up_seconds` | Total up time within the outage check period, in seconds |
| `pingdom_uptime_slo_error_budget_total_seconds` | Maximum number of allowed downtime, in seconds, according to the uptime SLO |
| `pingdom_uptime_slo_error_budget_available_seconds` | Number of seconds of downtime we can still have without breaking the uptime SLO |

## Development

Expand Down
15 changes: 14 additions & 1 deletion cmd/pingdom-exporter/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ var (
nil, nil,
)

pingdomRateLimitRemainingRequestsDesc = prometheus.NewDesc(
"pingdom_rate_limit_remaining_requests",
"Tracks the remaining requests allowed before hitting the short-term or long-term rate limit in the Pingdom API.",
nil, nil,
)

danielfm marked this conversation as resolved.
Show resolved Hide resolved
pingdomOutageCheckPeriodDesc = prometheus.NewDesc(
"pingdom_slo_period_seconds",
"Outage check period, in seconds",
Expand Down Expand Up @@ -96,6 +102,7 @@ type pingdomCollector struct {

func (pc pingdomCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- pingdomUpDesc
ch <- pingdomRateLimitRemainingRequestsDesc
ch <- pingdomOutageCheckPeriodDesc
ch <- pingdomCheckStatusDesc
ch <- pingdomCheckResponseTimeDesc
Expand All @@ -110,11 +117,17 @@ func (pc pingdomCollector) Collect(ch chan<- prometheus.Metric) {
outageCheckPeriodDuration := time.Hour * time.Duration(24*outageCheckPeriod)
outageCheckPeriodSecs := float64(outageCheckPeriodDuration / time.Second)

checks, err := pc.client.Checks.List(map[string]string{
checks, minReqLimit, err := pc.client.Checks.List(map[string]string{
"include_tags": "true",
"tags": pc.client.Tags,
})

ch <- prometheus.MustNewConstMetric(
pingdomRateLimitRemainingRequestsDesc,
prometheus.GaugeValue,
minReqLimit,
)

if err != nil {
fmt.Fprintf(os.Stderr, "Error getting checks: %v", err)
ch <- prometheus.MustNewConstMetric(
Expand Down
40 changes: 35 additions & 5 deletions pkg/pingdom/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,18 @@ package pingdom
import (
"encoding/json"
"io/ioutil"
"math"
"net/http"
"regexp"
"strconv"
)

var (
reqLimitHeaderKeys = []string{
"req-limit-short",
"req-limit-long",
}
reqLimitRe = regexp.MustCompile(`Remaining: (\d+) Time until reset: (\d+)`)
)

// CheckService provides an interface to Pingdom checks.
Expand All @@ -13,30 +25,48 @@ type CheckService struct {
// List returns a list of checks from Pingdom.
// This returns type CheckResponse rather than Check since the
// Pingdom API does not return a complete representation of a check.
func (cs *CheckService) List(params ...map[string]string) ([]CheckResponse, error) {
func (cs *CheckService) List(params ...map[string]string) ([]CheckResponse, float64, error) {
param := map[string]string{}
if len(params) == 1 {
param = params[0]
}
req, err := cs.client.NewRequest("GET", "/checks", param)
if err != nil {
return nil, err
return nil, 0, err
}

resp, err := cs.client.client.Do(req)
if err != nil {
return nil, err
return nil, 0, err
}
defer resp.Body.Close()

minRequestLimit := minRequestLimitFromHeader(resp.Header)

if err := validateResponse(resp); err != nil {
return nil, err
return nil, minRequestLimit, err
}

bodyBytes, _ := ioutil.ReadAll(resp.Body)
bodyString := string(bodyBytes)
m := &listChecksJSONResponse{}
err = json.Unmarshal([]byte(bodyString), &m)

return m.Checks, err
return m.Checks, minRequestLimit, err
}

func minRequestLimitFromHeader(header http.Header) float64 {
minRequestLimit := math.MaxFloat64

for _, key := range reqLimitHeaderKeys {
matches := reqLimitRe.FindStringSubmatch(header.Get(key))
if len(matches) > 0 {
limit, err := strconv.ParseFloat(matches[1], 64)
if err == nil && limit < minRequestLimit {
minRequestLimit = limit
}
}
}

return minRequestLimit
}
49 changes: 48 additions & 1 deletion pkg/pingdom/check_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package pingdom

import (
"fmt"
"math"
"net/http"
"testing"

Expand All @@ -14,6 +15,7 @@ func TestCheckServiceList(t *testing.T) {

mux.HandleFunc("/checks", func(w http.ResponseWriter, r *http.Request) {
testMethod(t, r, "GET")
w.Header().Set("req-limit-long", "Remaining: 12 Time until reset: 34")
fmt.Fprint(w, `{
"checks": [
{
Expand Down Expand Up @@ -141,7 +143,52 @@ func TestCheckServiceList(t *testing.T) {
},
}

checks, err := client.Checks.List()
checks, minRequestLimit, err := client.Checks.List()
assert.NoError(t, err)
assert.Equal(t, want, checks)
assert.EqualValues(t, 12, minRequestLimit)
}

func TestMinRequestLimitFromResp(t *testing.T) {
tc := []struct {
header http.Header
expected float64
}{
{
header: http.Header{},
expected: math.MaxFloat64,
},
{
header: http.Header{
"Req-Limit-Short": []string{"Remaining: 12 Time until reset: 34"},
},
expected: 12,
},
{
header: http.Header{
"Req-Limit-Long": []string{"Remaining: 56 Time until reset: 78"},
},
expected: 56,
},
{
header: http.Header{
"Req-Limit-Long": []string{"Remaining: 0 Time until reset: 78"},
"Req-Limit-Short": []string{"Remaining: 12 Time until reset: 34"},
},
expected: 0,
},
{
header: http.Header{
"Req-Limit-Long": []string{"invalid"},
},
expected: math.MaxFloat64,
},
}

for _, tt := range tc {
t.Run(fmt.Sprintf("%v", tt.header), func(t *testing.T) {
actual := minRequestLimitFromHeader(tt.header)
assert.Equal(t, tt.expected, actual)
})
}
}
Loading