Skip to content

feat: log resource drift warnings in all workspace builds #18355

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions cli/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -1479,9 +1479,10 @@ func newProvisionerDaemon(

err := terraform.Serve(ctx, &terraform.ServeOptions{
ServeOptions: &provisionersdk.ServeOptions{
Listener: terraformServer,
Logger: provisionerLogger,
WorkDirectory: workDir,
Listener: terraformServer,
Logger: provisionerLogger,
WorkDirectory: workDir,
LogTerraformPlanOutput: coderAPI.DeploymentValues.EnableTerraformPlanLogging.Value(),
},
CachePath: tfDir,
Tracer: tracer,
Expand Down
3 changes: 3 additions & 0 deletions cli/testdata/coder_server_--help.golden
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,9 @@ INTROSPECTION / LOGGING OPTIONS:
--enable-terraform-debug-mode bool, $CODER_ENABLE_TERRAFORM_DEBUG_MODE (default: false)
Allow administrators to enable Terraform debug output.

--enable-terraform-plan-logging bool, $CODER_ENABLE_TERRAFORM_PLAN_LOGGING (default: true)
See the output of `terraform show <plan>` on every workspace build.

--log-human string, $CODER_LOGGING_HUMAN (default: /dev/stderr)
Output human-readable logs to a given file.

Expand Down
3 changes: 3 additions & 0 deletions cli/testdata/server-config.yaml.golden
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,9 @@ introspection:
# Allow administrators to enable Terraform debug output.
# (default: false, type: bool)
enableTerraformDebugMode: false
# See the output of `terraform show <plan>` on every workspace build.
# (default: true, type: bool)
enableTerraformPlanLogging: true
healthcheck:
# Refresh interval for healthchecks.
# (default: 10m0s, type: duration)
Expand Down
3 changes: 3 additions & 0 deletions coderd/apidoc/docs.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions coderd/apidoc/swagger.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions codersdk/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,7 @@ type DeploymentValues struct {
DisableOwnerWorkspaceExec serpent.Bool `json:"disable_owner_workspace_exec,omitempty" typescript:",notnull"`
ProxyHealthStatusInterval serpent.Duration `json:"proxy_health_status_interval,omitempty" typescript:",notnull"`
EnableTerraformDebugMode serpent.Bool `json:"enable_terraform_debug_mode,omitempty" typescript:",notnull"`
EnableTerraformPlanLogging serpent.Bool `json:"enable_terraform_plan_logging,omitempty" typescript:",notnull"`
UserQuietHoursSchedule UserQuietHoursScheduleConfig `json:"user_quiet_hours_schedule,omitempty" typescript:",notnull"`
WebTerminalRenderer serpent.String `json:"web_terminal_renderer,omitempty" typescript:",notnull"`
AllowWorkspaceRenames serpent.Bool `json:"allow_workspace_renames,omitempty" typescript:",notnull"`
Expand Down Expand Up @@ -2266,6 +2267,16 @@ func (c *DeploymentValues) Options() serpent.OptionSet {
Group: &deploymentGroupIntrospectionLogging,
YAML: "enableTerraformDebugMode",
},
{
Name: "Enable Terraform plan logging",
Description: "See the output of `terraform show <plan>` on every workspace build.",
Flag: "enable-terraform-plan-logging",
Env: "CODER_ENABLE_TERRAFORM_PLAN_LOGGING",
Default: "true",
Value: &c.EnableTerraformPlanLogging,
Group: &deploymentGroupIntrospectionLogging,
YAML: "enableTerraformPlanLogging",
},
{
Name: "Additional CSP Policy",
Description: "Coder configures a Content Security Policy (CSP) to protect against XSS attacks. " +
Expand Down
1 change: 1 addition & 0 deletions docs/reference/api/general.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions docs/reference/api/schemas.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions docs/reference/cli/server.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 15 additions & 4 deletions enterprise/cli/provisionerdaemonstart.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ import (

"cdr.dev/slog"
"cdr.dev/slog/sloggers/sloghuman"
"github.com/coder/serpent"

agpl "github.com/coder/coder/v2/cli"
"github.com/coder/coder/v2/cli/clilog"
"github.com/coder/coder/v2/cli/cliui"
Expand All @@ -31,7 +33,6 @@ import (
provisionerdproto "github.com/coder/coder/v2/provisionerd/proto"
"github.com/coder/coder/v2/provisionersdk"
"github.com/coder/coder/v2/provisionersdk/proto"
"github.com/coder/serpent"
)

func (r *RootCmd) provisionerDaemonStart() *serpent.Command {
Expand All @@ -51,6 +52,8 @@ func (r *RootCmd) provisionerDaemonStart() *serpent.Command {

prometheusEnable bool
prometheusAddress string

logTerraformPlan bool
)
orgContext := agpl.NewOrganizationContext()
client := new(codersdk.Client)
Expand Down Expand Up @@ -186,9 +189,10 @@ func (r *RootCmd) provisionerDaemonStart() *serpent.Command {

err := terraform.Serve(ctx, &terraform.ServeOptions{
ServeOptions: &provisionersdk.ServeOptions{
Listener: terraformServer,
Logger: logger.Named("terraform"),
WorkDirectory: tempDir,
Listener: terraformServer,
Logger: logger.Named("terraform"),
WorkDirectory: tempDir,
LogTerraformPlanOutput: logTerraformPlan,
},
CachePath: cacheDir,
})
Expand Down Expand Up @@ -381,6 +385,13 @@ func (r *RootCmd) provisionerDaemonStart() *serpent.Command {
Value: serpent.StringOf(&prometheusAddress),
Default: "127.0.0.1:2112",
},
{
Flag: "enable-terraform-plan-logging",
Env: "CODER_ENABLE_TERRAFORM_PLAN_LOGGING",
Description: "See the output of `terraform show <plan>` on every workspace build.",
Default: "true",
Value: serpent.BoolOf(&logTerraformPlan),
},
}
orgContext.AttachOptions(cmd)

Expand Down
3 changes: 3 additions & 0 deletions enterprise/cli/testdata/coder_server_--help.golden
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,9 @@ INTROSPECTION / LOGGING OPTIONS:
--enable-terraform-debug-mode bool, $CODER_ENABLE_TERRAFORM_DEBUG_MODE (default: false)
Allow administrators to enable Terraform debug output.

--enable-terraform-plan-logging bool, $CODER_ENABLE_TERRAFORM_PLAN_LOGGING (default: true)
See the output of `terraform show <plan>` on every workspace build.

--log-human string, $CODER_LOGGING_HUMAN (default: /dev/stderr)
Output human-readable logs to a given file.

Expand Down
69 changes: 41 additions & 28 deletions provisioner/terraform/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ type executor struct {
cliConfigPath string
workdir string
// used to capture execution times at various stages
timings *timingAggregator
timings *timingAggregator
logTerraformPlan bool
}

func (e *executor) basicEnv() []string {
Expand Down Expand Up @@ -326,23 +327,19 @@ func (e *executor) plan(ctx, killCtx context.Context, env, vars []string, logr l
}
}

// When a prebuild claim attempt is made, log a warning if a resource is due to be replaced, since this will obviate
// the point of prebuilding if the expensive resource is replaced once claimed!
var (
isPrebuildClaimAttempt = !destroy && metadata.GetPrebuiltWorkspaceBuildStage().IsPrebuiltWorkspaceClaim()
resReps []*proto.ResourceReplacement
)
if repsFromPlan := findResourceReplacements(plan); len(repsFromPlan) > 0 {
if isPrebuildClaimAttempt {
// TODO(dannyk): we should log drift always (not just during prebuild claim attempts); we're validating that this output
// will not be overwhelming for end-users, but it'll certainly be super valuable for template admins
// to diagnose this resource replacement issue, at least.
// Once prebuilds moves out of beta, consider deleting this condition.

// ONLY log drift when the request is a plan related to a workspace build.
if req.GetPlanType() == proto.PlanType_WORKSPACE_BUILD {
// ALWAYS show drift for prebuilt workspace claims, otherwise ONLY show if it explicitly configured to.
if req.GetMetadata().GetPrebuiltWorkspaceBuildStage().IsPrebuiltWorkspaceClaim() || e.logTerraformPlan {
// Lock held before calling (see top of method).
e.logDrift(ctx, killCtx, planfilePath, logr)
e.logDrift(ctx, killCtx, planfilePath, newDriftLogSink(logr, req))
}
}

// When a prebuild claim attempt is made, log a warning if a resource is due to be replaced, since this will obviate
// the point of prebuilding if the expensive resource is replaced once claimed!
var resReps []*proto.ResourceReplacement
if repsFromPlan := findResourceReplacements(plan); len(repsFromPlan) > 0 {
resReps = make([]*proto.ResourceReplacement, 0, len(repsFromPlan))
for n, p := range repsFromPlan {
resReps = append(resReps, &proto.ResourceReplacement{
Expand Down Expand Up @@ -432,10 +429,33 @@ func (e *executor) parsePlan(ctx, killCtx context.Context, planfilePath string)
return p, err
}

// driftLogSink raises the log level for resource replacements if the current build is a prebuilt workspace claim.
type driftLogSink struct {
logSink
req *proto.PlanRequest
}

func newDriftLogSink(inner logSink, req *proto.PlanRequest) *driftLogSink {
return &driftLogSink{logSink: inner, req: req}
}

func (c *driftLogSink) ProvisionLog(level proto.LogLevel, line string) {
// Raise the log level for resource replacements because this impacts prebuilt workspace claiming.
// See https://coder.com/docs/@main/admin/templates/extending-templates/prebuilt-workspaces#preventing-resource-replacement.
if c.req != nil && c.req.GetMetadata().GetPrebuiltWorkspaceBuildStage().IsPrebuiltWorkspaceClaim() {
// Terraform indicates that a resource will be deleted and recreated by showing the change along with this substring.
if strings.Contains(line, "# forces replacement") {
level = proto.LogLevel_WARN
}
}

c.logSink.ProvisionLog(level, line)
}

// logDrift must only be called while the lock is held.
// It will log the output of `terraform show`, which will show which resources have drifted from the known state.
func (e *executor) logDrift(ctx, killCtx context.Context, planfilePath string, logr logSink) {
stdout, stdoutDone := resourceReplaceLogWriter(logr, e.logger)
stdout, stdoutDone := passthruLogWriter(logr, e.logger)
stderr, stderrDone := logWriter(logr, proto.LogLevel_ERROR)
defer func() {
_ = stdout.Close()
Expand All @@ -450,12 +470,12 @@ func (e *executor) logDrift(ctx, killCtx context.Context, planfilePath string, l
}
}

// resourceReplaceLogWriter highlights log lines relating to resource replacement by elevating their log level.
// This will help template admins to visually find problematic resources easier.
// passthruLogWriter writes all given log lines as INFO level since the log lines themselves are assumed to not have
// any level indication. This can be used when showing the raw output of terraform commands.
//
// The WriteCloser must be closed by the caller to end logging, after which the returned channel will be closed to
// indicate that logging of the written data has finished. Failure to close the WriteCloser will leak a goroutine.
func resourceReplaceLogWriter(sink logSink, logger slog.Logger) (io.WriteCloser, <-chan struct{}) {
func passthruLogWriter(sink logSink, logger slog.Logger) (io.WriteCloser, <-chan struct{}) {
r, w := io.Pipe()
done := make(chan struct{})

Expand All @@ -464,15 +484,8 @@ func resourceReplaceLogWriter(sink logSink, logger slog.Logger) (io.WriteCloser,

scanner := bufio.NewScanner(r)
for scanner.Scan() {
line := scanner.Bytes()
level := proto.LogLevel_INFO

// Terraform indicates that a resource will be deleted and recreated by showing the change along with this substring.
if bytes.Contains(line, []byte("# forces replacement")) {
level = proto.LogLevel_WARN
}

sink.ProvisionLog(level, string(line))
// TODO: this could be DEBUG level, alternatively.
sink.ProvisionLog(proto.LogLevel_INFO, string(scanner.Bytes()))
}
if err := scanner.Err(); err != nil {
logger.Error(context.Background(), "failed to read terraform log", slog.Error(err))
Expand Down
Loading
Loading