Skip to content

fix(agent/agentcontainers): avoid logspam in API updaterLoop #18710

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion agent/agentcontainers/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,7 @@ func (api *API) updaterLoop() {
// We utilize a TickerFunc here instead of a regular Ticker so that
// we can guarantee execution of the updateContainers method after
// advancing the clock.
var prevErr error
ticker := api.clock.TickerFunc(api.ctx, api.updateInterval, func() error {
done := make(chan error, 1)
var sent bool
Expand All @@ -466,9 +467,15 @@ func (api *API) updaterLoop() {
if err != nil {
if errors.Is(err, context.Canceled) {
api.logger.Warn(api.ctx, "updater loop ticker canceled", slog.Error(err))
} else {
return nil
}
// Avoid excessive logging of the same error.
if prevErr == nil || prevErr.Error() != err.Error() {
api.logger.Error(api.ctx, "updater loop ticker failed", slog.Error(err))
}
prevErr = err
} else {
prevErr = nil
}
default:
api.logger.Debug(api.ctx, "updater loop ticker skipped, update in progress")
Expand Down
99 changes: 99 additions & 0 deletions agent/agentcontainers/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"golang.org/x/xerrors"

"cdr.dev/slog"
"cdr.dev/slog/sloggers/sloghuman"
"cdr.dev/slog/sloggers/slogtest"
"github.com/coder/coder/v2/agent/agentcontainers"
"github.com/coder/coder/v2/agent/agentcontainers/acmock"
Expand Down Expand Up @@ -342,6 +343,104 @@ func (f *fakeExecer) getLastCommand() *exec.Cmd {
func TestAPI(t *testing.T) {
t.Parallel()

t.Run("NoUpdaterLoopLogspam", func(t *testing.T) {
t.Parallel()

var (
ctx = testutil.Context(t, testutil.WaitShort)
logbuf strings.Builder
logger = slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug).AppendSinks(sloghuman.Sink(&logbuf))
mClock = quartz.NewMock(t)
tickerTrap = mClock.Trap().TickerFunc("updaterLoop")
firstErr = xerrors.New("first error")
secondErr = xerrors.New("second error")
fakeCLI = &fakeContainerCLI{
listErr: firstErr,
}
)

api := agentcontainers.NewAPI(logger,
agentcontainers.WithClock(mClock),
agentcontainers.WithContainerCLI(fakeCLI),
)
api.Start()
defer api.Close()

// Make sure the ticker function has been registered
// before advancing the clock.
tickerTrap.MustWait(ctx).MustRelease(ctx)
tickerTrap.Close()

logbuf.Reset()

// First tick should handle the error.
_, aw := mClock.AdvanceNext()
aw.MustWait(ctx)

// Verify first error is logged.
got := logbuf.String()
t.Logf("got log: %q", got)
require.Contains(t, got, "updater loop ticker failed", "first error should be logged")
require.Contains(t, got, "first error", "should contain first error message")
logbuf.Reset()

// Second tick should handle the same error without logging it again.
_, aw = mClock.AdvanceNext()
aw.MustWait(ctx)

// Verify same error is not logged again.
got = logbuf.String()
t.Logf("got log: %q", got)
require.Empty(t, got, "same error should not be logged again")

// Change to a different error.
fakeCLI.listErr = secondErr

// Third tick should handle the different error and log it.
_, aw = mClock.AdvanceNext()
aw.MustWait(ctx)

// Verify different error is logged.
got = logbuf.String()
t.Logf("got log: %q", got)
require.Contains(t, got, "updater loop ticker failed", "different error should be logged")
require.Contains(t, got, "second error", "should contain second error message")
logbuf.Reset()

// Clear the error to simulate success.
fakeCLI.listErr = nil

// Fourth tick should succeed.
_, aw = mClock.AdvanceNext()
aw.MustWait(ctx)

// Fifth tick should continue to succeed.
_, aw = mClock.AdvanceNext()
aw.MustWait(ctx)

// Verify successful operations are logged properly.
got = logbuf.String()
t.Logf("got log: %q", got)
gotSuccessCount := strings.Count(got, "containers updated successfully")
require.GreaterOrEqual(t, gotSuccessCount, 2, "should have successful update got")
require.NotContains(t, got, "updater loop ticker failed", "no errors should be logged during success")
logbuf.Reset()

// Reintroduce the original error.
fakeCLI.listErr = firstErr

// Sixth tick should handle the error after success and log it.
_, aw = mClock.AdvanceNext()
aw.MustWait(ctx)

// Verify error after success is logged.
got = logbuf.String()
t.Logf("got log: %q", got)
require.Contains(t, got, "updater loop ticker failed", "error after success should be logged")
require.Contains(t, got, "first error", "should contain first error message")
logbuf.Reset()
})

// List tests the API.getContainers method using a mock
// implementation. It specifically tests caching behavior.
t.Run("List", func(t *testing.T) {
Expand Down
Loading