Skip to content

Commit 1d51dfc

Browse files
Merge remote-tracking branch 'origin/main' into 17432-limit-prebuild-failure-cost
2 parents 29e9cff + df56a13 commit 1d51dfc

File tree

379 files changed

+14599
-4382
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

379 files changed

+14599
-4382
lines changed

.github/actions/setup-go/action.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,15 @@ runs:
2626
export GOCACHE_DIR="$RUNNER_TEMP""\go-cache"
2727
export GOMODCACHE_DIR="$RUNNER_TEMP""\go-mod-cache"
2828
export GOPATH_DIR="$RUNNER_TEMP""\go-path"
29+
export GOTMP_DIR="$RUNNER_TEMP""\go-tmp"
2930
mkdir -p "$GOCACHE_DIR"
3031
mkdir -p "$GOMODCACHE_DIR"
3132
mkdir -p "$GOPATH_DIR"
33+
mkdir -p "$GOTMP_DIR"
3234
go env -w GOCACHE="$GOCACHE_DIR"
3335
go env -w GOMODCACHE="$GOMODCACHE_DIR"
3436
go env -w GOPATH="$GOPATH_DIR"
35-
37+
go env -w GOTMPDIR="$GOTMP_DIR"
3638
- name: Setup Go
3739
uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2
3840
with:

.github/workflows/ci.yaml

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ jobs:
188188
189189
# Check for any typos
190190
- name: Check for typos
191-
uses: crate-ci/typos@b1a1ef3893ff35ade0cfa71523852a49bfd05d19 # v1.31.1
191+
uses: crate-ci/typos@0f0ccba9ed1df83948f0c15026e4f5ccfce46109 # v1.32.0
192192
with:
193193
config: .github/workflows/typos.toml
194194

@@ -382,8 +382,8 @@ jobs:
382382
touch ~/.bash_profile && echo "export BASH_SILENCE_DEPRECATION_WARNING=1" >> ~/.bash_profile
383383
fi
384384
export TS_DEBUG_DISCO=true
385-
gotestsum --junitfile="gotests.xml" --jsonfile="gotests.json" \
386-
--packages="./..." -- $PARALLEL_FLAG -short -failfast
385+
gotestsum --junitfile="gotests.xml" --jsonfile="gotests.json" --rerun-fails=2 \
386+
--packages="./..." -- $PARALLEL_FLAG -short
387387
388388
- name: Upload Test Cache
389389
uses: ./.github/actions/test-cache/upload
@@ -436,6 +436,7 @@ jobs:
436436
TS_DEBUG_DISCO: "true"
437437
LC_CTYPE: "en_US.UTF-8"
438438
LC_ALL: "en_US.UTF-8"
439+
TEST_RETRIES: 2
439440
shell: bash
440441
run: |
441442
# By default Go will use the number of logical CPUs, which
@@ -453,7 +454,7 @@ jobs:
453454
api-key: ${{ secrets.DATADOG_API_KEY }}
454455

455456
test-go-pg:
456-
runs-on: ${{ matrix.os == 'ubuntu-latest' && github.repository_owner == 'coder' && 'depot-ubuntu-22.04-4' || matrix.os }}
457+
runs-on: ${{ matrix.os == 'ubuntu-latest' && github.repository_owner == 'coder' && 'depot-ubuntu-22.04-8' || matrix.os }}
457458
needs: changes
458459
if: needs.changes.outputs.go == 'true' || needs.changes.outputs.ci == 'true' || github.ref == 'refs/heads/main'
459460
# This timeout must be greater than the timeout set by `go test` in
@@ -499,6 +500,7 @@ jobs:
499500
TS_DEBUG_DISCO: "true"
500501
LC_CTYPE: "en_US.UTF-8"
501502
LC_ALL: "en_US.UTF-8"
503+
TEST_RETRIES: 2
502504
shell: bash
503505
run: |
504506
# By default Go will use the number of logical CPUs, which
@@ -560,6 +562,7 @@ jobs:
560562
env:
561563
POSTGRES_VERSION: "16"
562564
TS_DEBUG_DISCO: "true"
565+
TEST_RETRIES: 2
563566
run: |
564567
make test-postgres
565568
@@ -784,6 +787,7 @@ jobs:
784787
if: ${{ !matrix.variant.premium }}
785788
env:
786789
DEBUG: pw:api
790+
CODER_E2E_TEST_RETRIES: 2
787791
working-directory: site
788792

789793
# Run all of the tests with a premium license
@@ -793,6 +797,7 @@ jobs:
793797
DEBUG: pw:api
794798
CODER_E2E_LICENSE: ${{ secrets.CODER_E2E_LICENSE }}
795799
CODER_E2E_REQUIRE_PREMIUM_TESTS: "1"
800+
CODER_E2E_TEST_RETRIES: 2
796801
working-directory: site
797802

798803
- name: Upload Playwright Failed Tests

.github/workflows/dependabot.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ jobs:
2323
steps:
2424
- name: Dependabot metadata
2525
id: metadata
26-
uses: dependabot/fetch-metadata@d7267f607e9d3fb96fc2fbe83e0af444713e90b7 # v2.3.0
26+
uses: dependabot/fetch-metadata@08eff52bf64351f401fb50d4972fa95b9f2c2d1b # v2.4.0
2727
with:
2828
github-token: "${{ secrets.GITHUB_TOKEN }}"
2929

.github/workflows/docs-ci.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ jobs:
2828
- name: Setup Node
2929
uses: ./.github/actions/setup-node
3030

31-
- uses: tj-actions/changed-files@5426ecc3f5c2b10effaefbd374f0abdc6a571b2f # v45.0.7
31+
- uses: tj-actions/changed-files@480f49412651059a414a6a5c96887abb1877de8a # v45.0.7
3232
id: changed-files
3333
with:
3434
files: |

.github/workflows/nightly-gauntlet.yaml

Lines changed: 58 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,9 @@ permissions:
1212

1313
jobs:
1414
test-go-pg:
15-
runs-on: ${{ matrix.os == 'macos-latest' && github.repository_owner == 'coder' && 'depot-macos-latest' || matrix.os == 'windows-2022' && github.repository_owner == 'coder' && 'windows-latest-16-cores' || matrix.os }}
16-
if: github.ref == 'refs/heads/main'
15+
# make sure to adjust NUM_PARALLEL_PACKAGES and NUM_PARALLEL_TESTS below
16+
# when changing runner sizes
17+
runs-on: ${{ matrix.os == 'macos-latest' && github.repository_owner == 'coder' && 'depot-macos-latest' || matrix.os == 'windows-2022' && github.repository_owner == 'coder' && 'depot-windows-2022-16' || matrix.os }}
1718
# This timeout must be greater than the timeout set by `go test` in
1819
# `make test-postgres` to ensure we receive a trace of running
1920
# goroutines. Setting this to the timeout +5m should work quite well
@@ -31,22 +32,39 @@ jobs:
3132
with:
3233
egress-policy: audit
3334

35+
# macOS indexes all new files in the background. Our Postgres tests
36+
# create and destroy thousands of databases on disk, and Spotlight
37+
# tries to index all of them, seriously slowing down the tests.
38+
- name: Disable Spotlight Indexing
39+
if: runner.os == 'macOS'
40+
run: |
41+
sudo mdutil -a -i off
42+
sudo mdutil -X /
43+
sudo launchctl bootout system /System/Library/LaunchDaemons/com.apple.metadata.mds.plist
44+
45+
# Set up RAM disks to speed up the rest of the job. This action is in
46+
# a separate repository to allow its use before actions/checkout.
47+
- name: Setup RAM Disks
48+
if: runner.os == 'Windows'
49+
uses: coder/setup-ramdisk-action@79dacfe70c47ad6d6c0dd7f45412368802641439
50+
3451
- name: Checkout
3552
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
3653
with:
3754
fetch-depth: 1
3855

3956
- name: Setup Go
4057
uses: ./.github/actions/setup-go
58+
with:
59+
# Runners have Go baked-in and Go will automatically
60+
# download the toolchain configured in go.mod, so we don't
61+
# need to reinstall it. It's faster on Windows runners.
62+
use-preinstalled-go: ${{ runner.os == 'Windows' }}
63+
use-temp-cache-dirs: ${{ runner.os == 'Windows' }}
4164

4265
- name: Setup Terraform
4366
uses: ./.github/actions/setup-tf
4467

45-
# Sets up the ImDisk toolkit for Windows and creates a RAM disk on drive R:.
46-
- name: Setup ImDisk
47-
if: runner.os == 'Windows'
48-
uses: ./.github/actions/setup-imdisk
49-
5068
- name: Test with PostgreSQL Database
5169
env:
5270
POSTGRES_VERSION: "13"
@@ -55,6 +73,19 @@ jobs:
5573
LC_ALL: "en_US.UTF-8"
5674
shell: bash
5775
run: |
76+
if [ "${{ runner.os }}" == "Windows" ]; then
77+
# Create a temp dir on the R: ramdisk drive for Windows. The default
78+
# C: drive is extremely slow: https://github.com/actions/runner-images/issues/8755
79+
mkdir -p "R:/temp/embedded-pg"
80+
go run scripts/embedded-pg/main.go -path "R:/temp/embedded-pg"
81+
fi
82+
if [ "${{ runner.os }}" == "macOS" ]; then
83+
# Postgres runs faster on a ramdisk on macOS too
84+
mkdir -p /tmp/tmpfs
85+
sudo mount_tmpfs -o noowners -s 8g /tmp/tmpfs
86+
go run scripts/embedded-pg/main.go -path /tmp/tmpfs/embedded-pg
87+
fi
88+
5889
# if macOS, install google-chrome for scaletests
5990
# As another concern, should we really have this kind of external dependency
6091
# requirement on standard CI?
@@ -72,19 +103,29 @@ jobs:
72103
touch ~/.bash_profile && echo "export BASH_SILENCE_DEPRECATION_WARNING=1" >> ~/.bash_profile
73104
fi
74105
106+
# Golang's default for these 2 variables is the number of logical CPUs.
107+
# Our Windows and Linux runners have 16 cores, so they match up there.
108+
NUM_PARALLEL_PACKAGES=16
109+
NUM_PARALLEL_TESTS=16
75110
if [ "${{ runner.os }}" == "Windows" ]; then
76-
# Create a temp dir on the R: ramdisk drive for Windows. The default
77-
# C: drive is extremely slow: https://github.com/actions/runner-images/issues/8755
78-
mkdir -p "R:/temp/embedded-pg"
79-
go run scripts/embedded-pg/main.go -path "R:/temp/embedded-pg"
80-
else
81-
go run scripts/embedded-pg/main.go
111+
# On Windows Postgres chokes up when we have 16x16=256 tests
112+
# running in parallel, and dbtestutil.NewDB starts to take more than
113+
# 10s to complete sometimes causing test timeouts. With 16x8=128 tests
114+
# Postgres tends not to choke.
115+
NUM_PARALLEL_PACKAGES=8
116+
fi
117+
if [ "${{ runner.os }}" == "macOS" ]; then
118+
# Our macOS runners have 8 cores. We leave NUM_PARALLEL_TESTS at 16
119+
# because the tests complete faster and Postgres doesn't choke. It seems
120+
# that macOS's tmpfs is faster than the one on Windows.
121+
NUM_PARALLEL_PACKAGES=8
82122
fi
83123
84-
# Reduce test parallelism, mirroring what we do for race tests.
85-
# We'd been encountering issues with timing related flakes, and
86-
# this seems to help.
87-
DB=ci gotestsum --format standard-quiet -- -v -short -count=1 -parallel 4 -p 4 ./...
124+
# We rerun failing tests to counteract flakiness coming from Postgres
125+
# choking on macOS and Windows sometimes.
126+
DB=ci gotestsum --rerun-fails=2 --rerun-fails-max-failures=1000 \
127+
--format standard-quiet --packages "./..." \
128+
-- -v -p $NUM_PARALLEL_PACKAGES -parallel=$NUM_PARALLEL_TESTS -count=1
88129
89130
- name: Upload test stats to Datadog
90131
timeout-minutes: 1

.github/workflows/scorecard.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,6 @@ jobs:
4747

4848
# Upload the results to GitHub's code scanning dashboard.
4949
- name: "Upload to code-scanning"
50-
uses: github/codeql-action/upload-sarif@28deaeda66b76a05916b6923827895f2b14ab387 # v3.28.16
50+
uses: github/codeql-action/upload-sarif@60168efe1c415ce0f5521ea06d5c2062adbeed1b # v3.28.17
5151
with:
5252
sarif_file: results.sarif

.github/workflows/security.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ jobs:
3838
uses: ./.github/actions/setup-go
3939

4040
- name: Initialize CodeQL
41-
uses: github/codeql-action/init@28deaeda66b76a05916b6923827895f2b14ab387 # v3.28.16
41+
uses: github/codeql-action/init@60168efe1c415ce0f5521ea06d5c2062adbeed1b # v3.28.17
4242
with:
4343
languages: go, javascript
4444

@@ -48,7 +48,7 @@ jobs:
4848
rm Makefile
4949
5050
- name: Perform CodeQL Analysis
51-
uses: github/codeql-action/analyze@28deaeda66b76a05916b6923827895f2b14ab387 # v3.28.16
51+
uses: github/codeql-action/analyze@60168efe1c415ce0f5521ea06d5c2062adbeed1b # v3.28.17
5252

5353
- name: Send Slack notification on failure
5454
if: ${{ failure() }}
@@ -150,7 +150,7 @@ jobs:
150150
severity: "CRITICAL,HIGH"
151151

152152
- name: Upload Trivy scan results to GitHub Security tab
153-
uses: github/codeql-action/upload-sarif@28deaeda66b76a05916b6923827895f2b14ab387 # v3.28.16
153+
uses: github/codeql-action/upload-sarif@60168efe1c415ce0f5521ea06d5c2062adbeed1b # v3.28.17
154154
with:
155155
sarif_file: trivy-results.sarif
156156
category: "Trivy"

.github/workflows/weekly-docs.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ jobs:
3636
reporter: github-pr-review
3737
config_file: ".github/.linkspector.yml"
3838
fail_on_error: "true"
39-
filter_mode: "nofilter"
39+
filter_mode: "file"
4040

4141
- name: Send Slack notification
4242
if: failure() && github.event_name == 'schedule'

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ site/stats/
5050
*.tfplan
5151
*.lock.hcl
5252
.terraform/
53+
!coderd/testdata/parameters/modules/.terraform/
54+
!provisioner/terraform/testdata/modules-source-caching/.terraform/
5355

5456
**/.coderv2/*
5557
**/__debug_bin
@@ -82,3 +84,5 @@ result
8284

8385
# dlv debug binaries for go tests
8486
__debug_bin*
87+
88+
**/.claude/settings.local.json

Makefile

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -875,12 +875,19 @@ provisioner/terraform/testdata/version:
875875
fi
876876
.PHONY: provisioner/terraform/testdata/version
877877

878+
# Set the retry flags if TEST_RETRIES is set
879+
ifdef TEST_RETRIES
880+
GOTESTSUM_RETRY_FLAGS := --rerun-fails=$(TEST_RETRIES)
881+
else
882+
GOTESTSUM_RETRY_FLAGS :=
883+
endif
884+
878885
test:
879-
$(GIT_FLAGS) gotestsum --format standard-quiet -- -v -short -count=1 ./... $(if $(RUN),-run $(RUN))
886+
$(GIT_FLAGS) gotestsum --format standard-quiet $(GOTESTSUM_RETRY_FLAGS) --packages="./..." -- -v -short -count=1 $(if $(RUN),-run $(RUN))
880887
.PHONY: test
881888

882889
test-cli:
883-
$(GIT_FLAGS) gotestsum --format standard-quiet -- -v -short -count=1 ./cli/...
890+
$(GIT_FLAGS) gotestsum --format standard-quiet $(GOTESTSUM_RETRY_FLAGS) --packages="./cli/..." -- -v -short -count=1
884891
.PHONY: test-cli
885892

886893
# sqlc-cloud-is-setup will fail if no SQLc auth token is set. Use this as a
@@ -919,9 +926,9 @@ test-postgres: test-postgres-docker
919926
$(GIT_FLAGS) DB=ci gotestsum \
920927
--junitfile="gotests.xml" \
921928
--jsonfile="gotests.json" \
929+
$(GOTESTSUM_RETRY_FLAGS) \
922930
--packages="./..." -- \
923931
-timeout=20m \
924-
-failfast \
925932
-count=1
926933
.PHONY: test-postgres
927934

agent/agent.go

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ type Options struct {
8989
ServiceBannerRefreshInterval time.Duration
9090
BlockFileTransfer bool
9191
Execer agentexec.Execer
92+
SubAgent bool
9293

9394
ExperimentalDevcontainersEnabled bool
9495
ContainerAPIOptions []agentcontainers.Option // Enable ExperimentalDevcontainersEnabled for these to be effective.
@@ -190,6 +191,8 @@ func New(options Options) Agent {
190191
metrics: newAgentMetrics(prometheusRegistry),
191192
execer: options.Execer,
192193

194+
subAgent: options.SubAgent,
195+
193196
experimentalDevcontainersEnabled: options.ExperimentalDevcontainersEnabled,
194197
containerAPIOptions: options.ContainerAPIOptions,
195198
}
@@ -272,6 +275,8 @@ type agent struct {
272275
metrics *agentMetrics
273276
execer agentexec.Execer
274277

278+
subAgent bool
279+
275280
experimentalDevcontainersEnabled bool
276281
containerAPIOptions []agentcontainers.Option
277282
containerAPI atomic.Pointer[agentcontainers.API] // Set by apiHandler.
@@ -363,9 +368,11 @@ func (a *agent) runLoop() {
363368
if ctx.Err() != nil {
364369
// Context canceled errors may come from websocket pings, so we
365370
// don't want to use `errors.Is(err, context.Canceled)` here.
371+
a.logger.Warn(ctx, "runLoop exited with error", slog.Error(ctx.Err()))
366372
return
367373
}
368374
if a.isClosed() {
375+
a.logger.Warn(ctx, "runLoop exited because agent is closed")
369376
return
370377
}
371378
if errors.Is(err, io.EOF) {
@@ -1046,7 +1053,11 @@ func (a *agent) run() (retErr error) {
10461053
return a.statsReporter.reportLoop(ctx, aAPI)
10471054
})
10481055

1049-
return connMan.wait()
1056+
err = connMan.wait()
1057+
if err != nil {
1058+
a.logger.Info(context.Background(), "connection manager errored", slog.Error(err))
1059+
}
1060+
return err
10501061
}
10511062

10521063
// handleManifest returns a function that fetches and processes the manifest
@@ -1085,6 +1096,8 @@ func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context,
10851096
if err != nil {
10861097
return xerrors.Errorf("expand directory: %w", err)
10871098
}
1099+
// Normalize all devcontainer paths by making them absolute.
1100+
manifest.Devcontainers = agentcontainers.ExpandAllDevcontainerPaths(a.logger, expandPathToAbs, manifest.Devcontainers)
10881101
subsys, err := agentsdk.ProtoFromSubsystems(a.subsystems)
10891102
if err != nil {
10901103
a.logger.Critical(ctx, "failed to convert subsystems", slog.Error(err))
@@ -1127,7 +1140,7 @@ func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context,
11271140
)
11281141
if a.experimentalDevcontainersEnabled {
11291142
var dcScripts []codersdk.WorkspaceAgentScript
1130-
scripts, dcScripts = agentcontainers.ExtractAndInitializeDevcontainerScripts(a.logger, expandPathToAbs, manifest.Devcontainers, scripts)
1143+
scripts, dcScripts = agentcontainers.ExtractAndInitializeDevcontainerScripts(manifest.Devcontainers, scripts)
11311144
// See ExtractAndInitializeDevcontainerScripts for motivation
11321145
// behind running dcScripts as post start scripts.
11331146
scriptRunnerOpts = append(scriptRunnerOpts, agentscripts.WithPostStartScripts(dcScripts...))

0 commit comments

Comments
 (0)