Skip to content

feat: add database purge for old provisioner job logs and timings #18744

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions coderd/database/dbauthz/dbauthz.go
Original file line number Diff line number Diff line change
Expand Up @@ -1510,6 +1510,20 @@ func (q *querier) DeleteOldWorkspaceAgentLogs(ctx context.Context, threshold tim
return q.db.DeleteOldWorkspaceAgentLogs(ctx, threshold)
}

func (q *querier) DeleteOldProvisionerJobLogs(ctx context.Context, oldBuildThreshold time.Time) error {
if err := q.authorizeContext(ctx, policy.ActionDelete, rbac.ResourceSystem); err != nil {
return err
}
return q.db.DeleteOldProvisionerJobLogs(ctx, oldBuildThreshold)
}

func (q *querier) DeleteOldProvisionerJobTimings(ctx context.Context, oldBuildThreshold time.Time) error {
if err := q.authorizeContext(ctx, policy.ActionDelete, rbac.ResourceSystem); err != nil {
return err
}
return q.db.DeleteOldProvisionerJobTimings(ctx, oldBuildThreshold)
}

func (q *querier) DeleteOldWorkspaceAgentStats(ctx context.Context) error {
if err := q.authorizeContext(ctx, policy.ActionDelete, rbac.ResourceSystem); err != nil {
return err
Expand Down
114 changes: 111 additions & 3 deletions coderd/database/dbmem/dbmem.go
Original file line number Diff line number Diff line change
Expand Up @@ -2304,6 +2304,116 @@ func (q *FakeQuerier) DeleteOldWorkspaceAgentLogs(_ context.Context, threshold t
return nil
}

func (q *FakeQuerier) DeleteOldProvisionerJobLogs(_ context.Context, oldBuildThreshold time.Time) error {
q.mutex.Lock()
defer q.mutex.Unlock()

// Find job IDs to purge based on workspace deletion and build age
jobIDsToPurge := make(map[uuid.UUID]bool)

// Get all workspace builds and find which ones to purge
for _, build := range q.workspaceBuilds {
// Find the workspace for this build
var workspace *database.WorkspaceTable
for _, ws := range q.workspaces {
if ws.ID == build.WorkspaceID {
workspace = &ws
break
}
}
if workspace == nil {
continue
}

// If workspace is deleted, purge all its builds
if workspace.Deleted {
jobIDsToPurge[build.JobID] = true
continue
}

// For non-deleted workspaces, purge old builds (except latest)
if build.CreatedAt.Before(oldBuildThreshold) {
// Check if this is the latest build for the workspace
isLatest := true
for _, otherBuild := range q.workspaceBuilds {
if otherBuild.WorkspaceID == build.WorkspaceID && otherBuild.BuildNumber > build.BuildNumber {
isLatest = false
break
}
}
if !isLatest {
jobIDsToPurge[build.JobID] = true
}
}
}

// Delete logs for the identified job IDs
var remainingLogs []database.ProvisionerJobLog
for _, log := range q.provisionerJobLogs {
if !jobIDsToPurge[log.JobID] {
remainingLogs = append(remainingLogs, log)
}
}
q.provisionerJobLogs = remainingLogs

return nil
}

func (q *FakeQuerier) DeleteOldProvisionerJobTimings(_ context.Context, oldBuildThreshold time.Time) error {
q.mutex.Lock()
defer q.mutex.Unlock()

// Find job IDs to purge based on workspace deletion and build age
jobIDsToPurge := make(map[uuid.UUID]bool)

// Get all workspace builds and find which ones to purge
for _, build := range q.workspaceBuilds {
// Find the workspace for this build
var workspace *database.WorkspaceTable
for _, ws := range q.workspaces {
if ws.ID == build.WorkspaceID {
workspace = &ws
break
}
}
if workspace == nil {
continue
}

// If workspace is deleted, purge all its builds
if workspace.Deleted {
jobIDsToPurge[build.JobID] = true
continue
}

// For non-deleted workspaces, purge old builds (except latest)
if build.CreatedAt.Before(oldBuildThreshold) {
// Check if this is the latest build for the workspace
isLatest := true
for _, otherBuild := range q.workspaceBuilds {
if otherBuild.WorkspaceID == build.WorkspaceID && otherBuild.BuildNumber > build.BuildNumber {
isLatest = false
break
}
}
if !isLatest {
jobIDsToPurge[build.JobID] = true
}
}
}

// Delete timings for the identified job IDs
var remainingTimings []database.ProvisionerJobTiming
for _, timing := range q.provisionerJobTimings {
if !jobIDsToPurge[timing.JobID] {
remainingTimings = append(remainingTimings, timing)
}
}
q.provisionerJobTimings = remainingTimings

return nil
}

func (q *FakeQuerier) DeleteOldWorkspaceAgentStats(_ context.Context) error {
q.mutex.Lock()
defer q.mutex.Unlock()
Expand Down Expand Up @@ -4680,9 +4790,7 @@ func (q *FakeQuerier) GetProvisionerJobTimingsByJobID(_ context.Context, jobID u
timings = append(timings, timing)
}
}
if len(timings) == 0 {
return nil, sql.ErrNoRows
}
// Return empty slice if no timings found (don't return error)
sort.Slice(timings, func(i, j int) bool {
return timings[i].StartedAt.Before(timings[j].StartedAt)
})
Expand Down
14 changes: 14 additions & 0 deletions coderd/database/dbmetrics/querymetrics.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions coderd/database/dbmock/dbmock.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions coderd/database/dbpurge/dbpurge.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,17 @@ func New(ctx context.Context, logger slog.Logger, db database.Store, clk quartz.
return xerrors.Errorf("failed to delete old notification messages: %w", err)
}

// Purge old provisioner job logs and timings
// - logs & timings for all deleted workspaces
// - all logs & timings (except the latest build) for non-deleted workspaces where builds are older than 90 days
oldBuildThreshold := start.Add(-90 * 24 * time.Hour) // 90 days
if err := tx.DeleteOldProvisionerJobLogs(ctx, oldBuildThreshold); err != nil {
return xerrors.Errorf("failed to delete old provisioner job logs: %w", err)
}
if err := tx.DeleteOldProvisionerJobTimings(ctx, oldBuildThreshold); err != nil {
return xerrors.Errorf("failed to delete old provisioner job timings: %w", err)
}

logger.Debug(ctx, "purged old database entries", slog.F("duration", clk.Since(start)))

return nil
Expand Down
Loading
Loading