Hi, on May 25th around 5PM I created two clusters with terraform in the us-central1-c zone. and everything look good as far as I can tell. The next day I wanted to modify one of the clusters and the terraform plan command issued the following error:
│ Error: Get "https://34.135.135.31/apis/storage.k8s.io/v1/storageclasses/nfs-storage-prod-1": x509: certificate signed by unknown authority
│
│ with module.filestore.kubernetes_storage_class.nfs,
│ on filestore/main.tf line 40, in resource "kubernetes_storage_class" "nfs":
Curiously enough the other cluster also had the same issue. Clusters created on the 26th or before the 25th don't show this issue, only the ones on the 25th.
Since once of the cluster is meant for production, I need to find a solution to this problem. I tried to self-sing the certificate for 34.135.135.31 in my mac and then in linux and the problem still persists.
Any ideas on how to proceed would be most welcome!
Thanks,
Igor
Solved! Go to Solution.
I'm not sure what the underlying problem is, but here is a workaround,
https://jhooq.com/x509-certificate-signed/
to summarize, you get the problematic certificate either saving it with your browser or with the openssl command in a crt format
openssl s_client -connect registry.terraform.io:443 2>/dev/null </dev/null |\ sed -ne '/-BEGIN CERTIFICATE-/,/-END CERTIFICATE-/p'
load and add the certificate to your OS certificate storage and trust it.
Hello @astro-dl-iss,
Welcome to Google Cloud Community!
Can you post the code inside of your .tf file? Thanks
Hi Willbin!,
thank you so much for your reply
The directory structure for the simplified terraform project is as follows
./main.tf
./terraform.tfvars
./dev.tfvars
./cluster/gke.tf
where ./cluster is a terraform module in the main.tf
Running terraform apply executes successfully, (us-central1-c zone), but if I were to just do a 'terraform plan" afterwords I get:
# module.cluster.google_container_node_pool.medium-node-pool will be updated in-place
~ resource "google_container_node_pool" "medium-node-pool" {
id = "projects/data-lab-dev-01/locations/us-central1-c/clusters/dl-jhub-gke-dev-simple/nodePools/medium-pool"
name = "medium-pool"
~ node_count = 0 -> 1
# (8 unchanged attributes hidden)
# (5 unchanged blocks hidden)
}
Plan: 0 to add, 1 to change, 0 to destroy.
Changes to Outputs:
~ kubernetes_cluster_host = "https://34.66.33.155" -> (known after apply)
╷
│ Error: Get "https://34.66.33.155/api/v1/namespaces/dl-jhub-dev-simple": x509: “34.66.33.155” certificate is not standards compliant
│
│ with kubernetes_namespace.jhub-namespace,
│ on main.tf line 122, in resource "kubernetes_namespace" "jhub-namespace":
│ 122: resource "kubernetes_namespace" "jhub-namespace" {
│
╵
See terraform code below:
############
# maint.tf #
############
terraform {
backend "gcs" {
bucket = "backend-state-bucket"
prefix = "terraform.tfstate"
}
}
variable "project_id" {
description = "project id"
}
variable "region" {
description = "region"
}
variable "env" {
description = "environment type. e.g. dev / dev2 / qa / prod , etc"
}
variable "cluster_prefix" {
description = "prefix for the cluster. e.g dl_jhub"
}
variable "gke_node_count" {
default = 3
description = "number of gke nodes"
}
variable "nb_pool_node_count" {
default = 1
description = "number of nodes in notebook pool"
}
variable "zone" {
description = "zone"
}
variable "service_account" {
description = "Google service account to use"
}
variable "cluster_machine_type" {
description = "Machine type for cluster"
}
variable "notebook_machine_type" {
description = "Machine type for cluster"
}
variable "max_node_total" {
description = "Maximum number of instances in node"
}
variable "ip_address" {
description = "Static IP address for jupyterhub loadBalancer"
}
variable "host_dns_name" {
description = "DNS host name associated to static IP address"
}
variable "contact_email" {
default = "contact@email.com"
description = "email use in letsencrypt for jupyterhub https"
}
variable "cull-timeout" {
default = 3600
description = "The idle timeout (in seconds) before the jupyterhub culling server removes idle kernels"
}
provider "google" {
project = var.project_id
region = var.region
}
module "cluster" {
source = "./cluster"
project_id = var.project_id
region = var.region
env = var.env
gke_node_count = var.gke_node_count
nb_pool_node_count = var.nb_pool_node_count
cluster_prefix = var.cluster_prefix
cluster_machine_type = var.cluster_machine_type
max_node_total = var.max_node_total
notebook_machine_type = var.notebook_machine_type
service_account = var.service_account
zone = var.zone
}
data "google_client_config" "default" {}
data "google_container_cluster" "this_cluster" {
depends_on = [module.cluster]
name = module.cluster.kubernetes_cluster_name
location = var.zone
}
provider "kubernetes" {
host = "https://${module.cluster.cluster_endpoint}"
token = data.google_client_config.default.access_token
cluster_ca_certificate = base64decode(data.google_container_cluster.this_cluster.master_auth[0].cluster_ca_certificate)
}
resource "kubernetes_namespace" "jhub-namespace" {
metadata {
labels = {
name = "jhub-namespace"
}
name = "${var.cluster_prefix}-${var.env}"
}
}
####################
# ./cluster/gke.tf #
####################
## input variables
variable "project_id" {
description = "project id"
}
variable "gke_node_count" {
default = 3
description = "number of gke nodes"
}
variable "nb_pool_node_count" {
default = 2
description = "number of nodes in notebook pool"
}
variable "region" {
description = "region"
}
variable "zone" {
description = "zone"
}
variable "service_account" {
description = "Google service account to use"
}
variable "cluster_prefix" {
description = "Prefix to name cluster, vpc, etc"
}
variable "cluster_machine_type" {
description = "Machine type for cluster"
}
variable "notebook_machine_type" {
description = "Machine type for cluster"
}
variable "max_node_total" {
description = "Maximum number of instances in node"
}
variable "env" {
description = "environment type. e.g. dev / dev2 / qa / prod , etc"
}
## output variables
output "kubernetes_cluster_name" {
value = google_container_cluster.jhub-cluster.name
description = "GKE Cluster Name"
}
output "location" {
value = google_container_cluster.jhub-cluster.location
description = "GCloud Zone"
}
output "cluster_endpoint" {
value = google_container_cluster.jhub-cluster.endpoint
}
# GKE cluster
resource "google_container_cluster" "jhub-cluster" {
name = "${var.cluster_prefix}-gke-${var.env}"
location = var.zone
# We can't create a cluster with no node pool defined, but we want to only use
# separately managed node pools. So we create the smallest possible default
# node pool and immediately delete it.
remove_default_node_pool = true
initial_node_count = 1
}
# Separately Managed Node Pool
resource "google_container_node_pool" "primary-cluster-node-pool" {
name = google_container_cluster.jhub-cluster.name
#location = var.region
location = var.zone
cluster = google_container_cluster.jhub-cluster.name
node_count = var.gke_node_count
autoscaling {
max_node_count = 2
min_node_count = 2
}
node_config {
service_account = var.service_account
oauth_scopes = [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/trace.append"
]
labels = {
nb_type = "primary"
env = var.project_id
}
# preemptible = true
machine_type = var.cluster_machine_type
tags = ["gke-node", "${var.cluster_prefix}-gke-${var.env}"]
metadata = {
disable-legacy-endpoints = "true"
}
}
}
resource "google_container_node_pool" "medium-node-pool" {
name = "medium-pool"
#location = var.region
location = var.zone
cluster = google_container_cluster.jhub-cluster.name
node_count = var.nb_pool_node_count
autoscaling {
max_node_count = var.max_node_total
min_node_count = 0
}
node_config {
service_account = var.service_account
oauth_scopes = [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/trace.append"
]
labels = {
"hub.jupyter.org/node-purpose" = "user"
nb_type = "medium"
env = var.project_id
}
taint {
key = "hub.jupyter.org/dedicated"
value = "user"
effect = "NO_SCHEDULE"
}
# preemptible = true
machine_type = var.notebook_machine_type
tags = ["notebook-node", "${var.cluster_prefix}-gke-${var.env}"]
metadata = {
disable-legacy-endpoints = "true"
}
}
}
####################
# terraform.tfvars #
####################
project_id = "my-project-id"
region = "us-central1"
zone = "us-central1-c"
cluster_prefix = "dl-jhub"
cluster_machine_type = "e2-standard-2"
notebook_machine_type = "n2-standard-4"
max_node_total = 20
service_account = "SA-with-lots-of-privileges@my-project-id.iam.gserviceaccount.com"
gke_node_count = 2
nb_pool_node_count = 1
##############
# dev.tfvars #
##############
env = "dev"
I'm not sure what the underlying problem is, but here is a workaround,
https://jhooq.com/x509-certificate-signed/
to summarize, you get the problematic certificate either saving it with your browser or with the openssl command in a crt format
openssl s_client -connect registry.terraform.io:443 2>/dev/null </dev/null |\ sed -ne '/-BEGIN CERTIFICATE-/,/-END CERTIFICATE-/p'
load and add the certificate to your OS certificate storage and trust it.