Skip to content

Commit 32f43de

Browse files
feat: add hard-limited presets metric
1 parent 4cb35c4 commit 32f43de

File tree

4 files changed

+82
-4
lines changed

4 files changed

+82
-4
lines changed

enterprise/coderd/prebuilds/metricscollector.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ const (
2727
MetricDesiredGauge = namespace + "desired"
2828
MetricRunningGauge = namespace + "running"
2929
MetricEligibleGauge = namespace + "eligible"
30+
MetricHardLimitedPresetsGauge = namespace + "hard_limited_presets"
3031
MetricLastUpdatedGauge = namespace + "metrics_last_updated"
3132
)
3233

@@ -82,6 +83,12 @@ var (
8283
labels,
8384
nil,
8485
)
86+
hardLimitedPresetsDesc = prometheus.NewDesc(
87+
MetricHardLimitedPresetsGauge,
88+
"Current number of presets that have reached the hard failure limit and will no longer have prebuilds created.",
89+
labels,
90+
nil,
91+
)
8592
lastUpdateDesc = prometheus.NewDesc(
8693
MetricLastUpdatedGauge,
8794
"The unix timestamp when the metrics related to prebuilt workspaces were last updated; these metrics are cached.",
@@ -104,17 +111,22 @@ type MetricsCollector struct {
104111

105112
replacementsCounter map[replacementKey]float64
106113
replacementsCounterMu sync.Mutex
114+
115+
isPresetHardLimited map[hardLimitedPresetKey]bool
116+
isPresetHardLimitedMu sync.Mutex
107117
}
108118

109119
var _ prometheus.Collector = new(MetricsCollector)
110120

111121
func NewMetricsCollector(db database.Store, logger slog.Logger, snapshotter prebuilds.StateSnapshotter) *MetricsCollector {
112122
log := logger.Named("prebuilds_metrics_collector")
123+
113124
return &MetricsCollector{
114125
database: db,
115126
logger: log,
116127
snapshotter: snapshotter,
117128
replacementsCounter: make(map[replacementKey]float64),
129+
isPresetHardLimited: make(map[hardLimitedPresetKey]bool),
118130
}
119131
}
120132

@@ -126,6 +138,7 @@ func (*MetricsCollector) Describe(descCh chan<- *prometheus.Desc) {
126138
descCh <- desiredPrebuildsDesc
127139
descCh <- runningPrebuildsDesc
128140
descCh <- eligiblePrebuildsDesc
141+
descCh <- hardLimitedPresetsDesc
129142
descCh <- lastUpdateDesc
130143
}
131144

@@ -173,6 +186,17 @@ func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) {
173186
metricsCh <- prometheus.MustNewConstMetric(eligiblePrebuildsDesc, prometheus.GaugeValue, float64(state.Eligible), preset.TemplateName, preset.Name, preset.OrganizationName)
174187
}
175188

189+
mc.isPresetHardLimitedMu.Lock()
190+
for key, isHardLimited := range mc.isPresetHardLimited {
191+
var val float64
192+
if isHardLimited {
193+
val = 1
194+
}
195+
196+
metricsCh <- prometheus.MustNewConstMetric(hardLimitedPresetsDesc, prometheus.GaugeValue, val, key.templateName, key.presetName, key.orgName)
197+
}
198+
mc.isPresetHardLimitedMu.Unlock()
199+
176200
metricsCh <- prometheus.MustNewConstMetric(lastUpdateDesc, prometheus.GaugeValue, float64(currentState.createdAt.Unix()))
177201
}
178202

@@ -247,3 +271,20 @@ func (mc *MetricsCollector) trackResourceReplacement(orgName, templateName, pres
247271
// cause an issue (or indeed if either would), so we just track the replacement.
248272
mc.replacementsCounter[key]++
249273
}
274+
275+
type hardLimitedPresetKey struct {
276+
orgName, templateName, presetName string
277+
}
278+
279+
func (k hardLimitedPresetKey) String() string {
280+
return fmt.Sprintf("%s:%s:%s", k.orgName, k.templateName, k.presetName)
281+
}
282+
283+
func (mc *MetricsCollector) trackHardLimitedStatus(orgName, templateName, presetName string, isHardLimited bool) {
284+
mc.isPresetHardLimitedMu.Lock()
285+
defer mc.isPresetHardLimitedMu.Unlock()
286+
287+
key := hardLimitedPresetKey{orgName: orgName, templateName: templateName, presetName: presetName}
288+
289+
mc.isPresetHardLimited[key] = isHardLimited
290+
}

enterprise/coderd/prebuilds/metricscollector_test.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,11 @@ package prebuilds_test
22

33
import (
44
"fmt"
5-
"slices"
6-
"testing"
7-
85
"github.com/google/uuid"
96
"github.com/stretchr/testify/require"
7+
"slices"
108
"tailscale.com/types/ptr"
9+
"testing"
1110

1211
"github.com/prometheus/client_golang/prometheus"
1312
prometheus_client "github.com/prometheus/client_model/go"

enterprise/coderd/prebuilds/reconcile.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,8 @@ func (c *StoreReconciler) ReconcilePreset(ctx context.Context, ps prebuilds.Pres
361361
slog.F("preset_name", ps.Preset.Name),
362362
)
363363

364+
c.metrics.trackHardLimitedStatus(ps.Preset.OrganizationName, ps.Preset.TemplateName, ps.Preset.Name, ps.IsHardLimited)
365+
364366
// If the preset was previously hard-limited, log it and exit early.
365367
if ps.Preset.PrebuildStatus == database.PrebuildStatusHardLimited {
366368
logger.Warn(ctx, "skipping hard limited preset")

enterprise/coderd/prebuilds/reconcile_test.go

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -696,7 +696,8 @@ func TestSkippingHardLimitedPresets(t *testing.T) {
696696
).Leveled(slog.LevelDebug)
697697
db, pubSub := dbtestutil.NewDB(t)
698698
fakeEnqueuer := newFakeEnqueuer()
699-
controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, clock, prometheus.NewRegistry(), fakeEnqueuer)
699+
registry := prometheus.NewRegistry()
700+
controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, clock, registry, fakeEnqueuer)
700701

701702
// Template admin to receive a notification.
702703
templateAdmin := dbgen.User(t, db, database.User{
@@ -732,6 +733,17 @@ func TestSkippingHardLimitedPresets(t *testing.T) {
732733
workspaceCount := len(workspaces)
733734
require.Equal(t, 1, workspaceCount)
734735

736+
// Verify initial state: metric is not set - meaning preset is not hard limited.
737+
require.NoError(t, controller.ForceMetricsUpdate(ctx))
738+
mf, err := registry.Gather()
739+
require.NoError(t, err)
740+
metric := findMetric(mf, prebuilds.MetricHardLimitedPresetsGauge, map[string]string{
741+
"template_name": template.Name,
742+
"preset_name": preset.Name,
743+
"org_name": org.Name,
744+
})
745+
require.Nil(t, metric)
746+
735747
// We simulate a failed prebuild in the test; Consequently, the backoff mechanism is triggered when ReconcileAll is called.
736748
// Even though ReconciliationBackoffInterval is set to zero, we still need to advance the clock by at least one nanosecond.
737749
clock.Advance(time.Nanosecond).MustWait(ctx)
@@ -755,6 +767,18 @@ func TestSkippingHardLimitedPresets(t *testing.T) {
755767
// When hard limit is not reached, a new workspace should be created.
756768
require.Equal(t, 2, len(workspaces))
757769
require.Equal(t, database.PrebuildStatusHealthy, updatedPreset.PrebuildStatus)
770+
771+
// When hard limit is not reached, metric is set to 0.
772+
mf, err = registry.Gather()
773+
require.NoError(t, err)
774+
metric = findMetric(mf, prebuilds.MetricHardLimitedPresetsGauge, map[string]string{
775+
"template_name": template.Name,
776+
"preset_name": preset.Name,
777+
"org_name": org.Name,
778+
})
779+
require.NotNil(t, metric)
780+
require.NotNil(t, metric.GetGauge())
781+
require.EqualValues(t, 0, metric.GetGauge().GetValue())
758782
return
759783
}
760784

@@ -775,6 +799,18 @@ func TestSkippingHardLimitedPresets(t *testing.T) {
775799
return true
776800
})
777801
require.Len(t, matching, 1)
802+
803+
// When hard limit is reached, metric is set to 1.
804+
mf, err = registry.Gather()
805+
require.NoError(t, err)
806+
metric = findMetric(mf, prebuilds.MetricHardLimitedPresetsGauge, map[string]string{
807+
"template_name": template.Name,
808+
"preset_name": preset.Name,
809+
"org_name": org.Name,
810+
})
811+
require.NotNil(t, metric)
812+
require.NotNil(t, metric.GetGauge())
813+
require.EqualValues(t, 1, metric.GetGauge().GetValue())
778814
})
779815
}
780816
}

0 commit comments

Comments
 (0)