Skip to content

ci: cache embedded postgres downloaded binaries #18477

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Jun 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions .github/actions/embedded-pg-cache/download/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: "Download Embedded Postgres Cache"
description: |
Downloads the embedded postgres cache and outputs today's cache key.
A PR job can use a cache if it was created by its base branch, its current
branch, or the default branch.
https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/caching-dependencies-to-speed-up-workflows#restrictions-for-accessing-a-cache
outputs:
cache-key:
description: "Today's cache key"
value: ${{ steps.vars.outputs.cache-key }}
inputs:
key-prefix:
description: "Prefix for the cache key"
required: true
cache-path:
description: "Path to the cache directory"
required: true
runs:
using: "composite"
steps:
- name: Get date values and cache key
id: vars
shell: bash
run: |
export YEAR_MONTH=$(date +'%Y-%m')
export PREV_YEAR_MONTH=$(date -d 'last month' +'%Y-%m')
export DAY=$(date +'%d')
echo "year-month=$YEAR_MONTH" >> $GITHUB_OUTPUT
echo "prev-year-month=$PREV_YEAR_MONTH" >> $GITHUB_OUTPUT
echo "cache-key=${{ inputs.key-prefix }}-${YEAR_MONTH}-${DAY}" >> $GITHUB_OUTPUT

# By default, depot keeps caches for 14 days. This is plenty for embedded
# postgres, which changes infrequently.
# https://depot.dev/docs/github-actions/overview#cache-retention-policy
- name: Download embedded Postgres cache
uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with:
path: ${{ inputs.cache-path }}
key: ${{ steps.vars.outputs.cache-key }}
# > If there are multiple partial matches for a restore key, the action returns the most recently created cache.
# https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/caching-dependencies-to-speed-up-workflows#matching-a-cache-key
# The second restore key allows non-main branches to use the cache from the previous month.
# This prevents PRs from rebuilding the cache on the first day of the month.
# It also makes sure that once a month, the cache is fully reset.
restore-keys: |
${{ inputs.key-prefix }}-${{ steps.vars.outputs.year-month }}-
${{ github.ref != 'refs/heads/main' && format('{0}-{1}-', inputs.key-prefix, steps.vars.outputs.prev-year-month) || '' }}
18 changes: 18 additions & 0 deletions .github/actions/embedded-pg-cache/upload/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
name: "Upload Embedded Postgres Cache"
description: Uploads the embedded Postgres cache. This only runs on the main branch.
inputs:
cache-key:
description: "Cache key"
required: true
cache-path:
description: "Path to the cache directory"
required: true
runs:
using: "composite"
steps:
- name: Upload Embedded Postgres cache
if: ${{ github.ref == 'refs/heads/main' }}
uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with:
path: ${{ inputs.cache-path }}
key: ${{ inputs.cache-key }}
33 changes: 33 additions & 0 deletions .github/actions/setup-embedded-pg-cache-paths/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: "Setup Embedded Postgres Cache Paths"
description: Sets up a path for cached embedded postgres binaries.
outputs:
embedded-pg-cache:
description: "Value of EMBEDDED_PG_CACHE_DIR"
value: ${{ steps.paths.outputs.embedded-pg-cache }}
cached-dirs:
description: "directories that should be cached between CI runs"
value: ${{ steps.paths.outputs.cached-dirs }}
runs:
using: "composite"
steps:
- name: Override Go paths
id: paths
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7
with:
script: |
const path = require('path');

// RUNNER_TEMP should be backed by a RAM disk on Windows if
// coder/setup-ramdisk-action was used
const runnerTemp = process.env.RUNNER_TEMP;
const embeddedPgCacheDir = path.join(runnerTemp, 'embedded-pg-cache');
core.exportVariable('EMBEDDED_PG_CACHE_DIR', embeddedPgCacheDir);
core.setOutput('embedded-pg-cache', embeddedPgCacheDir);
const cachedDirs = `${embeddedPgCacheDir}`;
core.setOutput('cached-dirs', cachedDirs);

- name: Create directories
shell: bash
run: |
set -e
mkdir -p "$EMBEDDED_PG_CACHE_DIR"
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This ends up being C:\actions-runner\_temp\embedded-pg-cache

23 changes: 21 additions & 2 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,17 @@ jobs:
with:
key-prefix: test-go-pg-${{ runner.os }}-${{ runner.arch }}

- name: Setup Embedded Postgres Cache Paths
id: embedded-pg-cache
uses: ./.github/actions/setup-embedded-pg-cache-paths

- name: Download Embedded Postgres Cache
id: download-embedded-pg-cache
uses: ./.github/actions/embedded-pg-cache/download
with:
key-prefix: embedded-pg-${{ runner.os }}-${{ runner.arch }}
cache-path: ${{ steps.embedded-pg-cache.outputs.cached-dirs }}

- name: Normalize File and Directory Timestamps
shell: bash
# Normalize file modification timestamps so that go test can use the
Expand All @@ -497,12 +508,12 @@ jobs:
# Create a temp dir on the R: ramdisk drive for Windows. The default
# C: drive is extremely slow: https://github.com/actions/runner-images/issues/8755
mkdir -p "R:/temp/embedded-pg"
go run scripts/embedded-pg/main.go -path "R:/temp/embedded-pg"
go run scripts/embedded-pg/main.go -path "R:/temp/embedded-pg" -cache "${EMBEDDED_PG_CACHE_DIR}"
elif [ "${{ runner.os }}" == "macOS" ]; then
# Postgres runs faster on a ramdisk on macOS too
mkdir -p /tmp/tmpfs
sudo mount_tmpfs -o noowners -s 8g /tmp/tmpfs
go run scripts/embedded-pg/main.go -path /tmp/tmpfs/embedded-pg
go run scripts/embedded-pg/main.go -path /tmp/tmpfs/embedded-pg -cache "${EMBEDDED_PG_CACHE_DIR}"
elif [ "${{ runner.os }}" == "Linux" ]; then
make test-postgres-docker
fi
Expand Down Expand Up @@ -571,6 +582,14 @@ jobs:
with:
cache-key: ${{ steps.download-cache.outputs.cache-key }}

- name: Upload Embedded Postgres Cache
uses: ./.github/actions/embedded-pg-cache/upload
# We only use the embedded Postgres cache on macOS and Windows runners.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not everywhere?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can see an argument for using it on Linux too, but I'd suggest keeping it out of scope of this PR as it's a wider discussion.

Also, Docker is a nice abstraction for running a throw-away database.

if: runner.OS == 'macOS' || runner.OS == 'Windows'
with:
cache-key: ${{ steps.download-embedded-pg-cache.outputs.cache-key }}
cache-path: "${{ steps.embedded-pg-cache.outputs.embedded-pg-cache }}"

- name: Upload test stats to Datadog
timeout-minutes: 1
continue-on-error: true
Expand Down
47 changes: 39 additions & 8 deletions scripts/embedded-pg/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,43 @@ package main
import (
"database/sql"
"flag"
"log"
"os"
"path/filepath"
"time"

embeddedpostgres "github.com/fergusstrange/embedded-postgres"
)

func main() {
var customPath string
var cachePath string
flag.StringVar(&customPath, "path", "", "Optional custom path for postgres data directory")
flag.StringVar(&cachePath, "cache", "", "Optional custom path for embedded postgres binaries")
flag.Parse()

postgresPath := filepath.Join(os.TempDir(), "coder-test-postgres")
if customPath != "" {
postgresPath = customPath
}
if err := os.MkdirAll(postgresPath, os.ModePerm); err != nil {
log.Fatalf("Failed to create directory %s: %v", postgresPath, err)
}
if cachePath == "" {
cachePath = filepath.Join(postgresPath, "cache")
}
if err := os.MkdirAll(cachePath, os.ModePerm); err != nil {
log.Fatalf("Failed to create directory %s: %v", cachePath, err)
}

ep := embeddedpostgres.NewDatabase(
embeddedpostgres.DefaultConfig().
Version(embeddedpostgres.V16).
BinariesPath(filepath.Join(postgresPath, "bin")).
// Default BinaryRepositoryURL repo1.maven.org is flaky.
BinaryRepositoryURL("https://repo.maven.apache.org/maven2").
DataPath(filepath.Join(postgresPath, "data")).
RuntimePath(filepath.Join(postgresPath, "runtime")).
CachePath(filepath.Join(postgresPath, "cache")).
CachePath(cachePath).
Username("postgres").
Password("postgres").
Database("postgres").
Expand All @@ -38,8 +50,27 @@ func main() {
)
err := ep.Start()
if err != nil {
panic(err)
log.Fatalf("Failed to start embedded postgres: %v", err)
}

// Troubleshooting: list files in cachePath
if err := filepath.Walk(cachePath, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
switch {
case info.IsDir():
log.Printf("D: %s", path)
case info.Mode().IsRegular():
log.Printf("F: %s [%s] (%d bytes) %s", path, info.Mode().String(), info.Size(), info.ModTime().Format(time.RFC3339))
default:
log.Printf("Other: %s [%s] %s", path, info.Mode(), info.ModTime().Format(time.RFC3339))
}
return nil
}); err != nil {
log.Printf("Failed to list files in cachePath %s: %v", cachePath, err)
Comment on lines +56 to +71
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

review: I think it makes sense to leave this in; I found it useful for troubleshooting before I knew about the wush trick.

}

// We execute these queries instead of using the embeddedpostgres
// StartParams because it doesn't work on Windows. The library
// seems to have a bug where it sends malformed parameters to
Expand All @@ -58,21 +89,21 @@ func main() {
}
db, err := sql.Open("postgres", "postgres://postgres:[email protected]:5432/postgres?sslmode=disable")
if err != nil {
panic(err)
log.Fatalf("Failed to connect to embedded postgres: %v", err)
}
for _, query := range paramQueries {
if _, err := db.Exec(query); err != nil {
panic(err)
log.Fatalf("Failed to execute setup query %q: %v", query, err)
}
}
if err := db.Close(); err != nil {
panic(err)
log.Fatalf("Failed to close database connection: %v", err)
}
// We restart the database to apply all the parameters.
if err := ep.Stop(); err != nil {
panic(err)
log.Fatalf("Failed to stop embedded postgres after applying parameters: %v", err)
}
if err := ep.Start(); err != nil {
panic(err)
log.Fatalf("Failed to start embedded postgres after applying parameters: %v", err)
}
}
Loading