Skip to content

Commit

Permalink
Added support for spot TPU node pools (#28)
Browse files Browse the repository at this point in the history
  • Loading branch information
jarokaz committed Jan 31, 2024
1 parent e23e129 commit 7e84b24
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ locals {
labels = node_pool.labels
oauth_scopes = node_pool.oauth_scopes
reservation_affinity = node_pool.reservation_affinity
spot = node_pool.spot
}
}
}
Expand Down Expand Up @@ -198,6 +199,7 @@ resource "google_container_node_pool" "tpu_node_pools" {
disk_type = each.value.disk_type
disk_size_gb = each.value.disk_size_gb
oauth_scopes = each.value.oauth_scopes
spot = each.value.spot
gvnic {
enabled = each.value.gvnic
}
Expand Down
5 changes: 3 additions & 2 deletions ai-infrastructure/terraform-modules/gke-aiml/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ variable "cluster_config" {
type = object({
name = optional(string, "gke-ml-cluster")
release_channel = optional(string, "REGULAR")
version = optional(string, "1.27.5-gke.200")
version = optional(string, null)
description = optional(string, "GKE ML inference cluster")
gcs_fuse_csi_driver = optional(bool, true)
gce_persistent_disk_csi_driver = optional(bool, true)
Expand Down Expand Up @@ -160,12 +160,13 @@ variable "tpu_node_pools" {
min_node_count = number
max_node_count = number
tpu_type = string
disk_type = optional(string, "pd-standard")
disk_type = optional(string, null)
disk_size_gb = optional(string, 200)
gvnic = optional(bool, true)
gcfs = optional(bool, true)
auto_repair = optional(bool, true)
auto_upgrade = optional(bool, true)
spot = optional(bool, false)
reservation_affinity = optional(object({
consume_reservation_type = string
key = string
Expand Down

0 comments on commit 7e84b24

Please sign in to comment.