lea*_*ner 2 kubernetes azure-aks
我有一个模块定义如下:
===
提供商.tf
provider "kubernetes" {
#load_config_file = "false"
host = azurerm_kubernetes_cluster.aks.kube_config.0.host
username = azurerm_kubernetes_cluster.aks.kube_config.0.username
password = azurerm_kubernetes_cluster.aks.kube_config.0.password
client_certificate = base64decode(azurerm_kubernetes_cluster.aks.kube_config.0.client_certificate)
client_key = base64decode(azurerm_kubernetes_cluster.aks.kube_config.0.client_key)
cluster_ca_certificate = base64decode(azurerm_kubernetes_cluster.aks.kube_config.0.cluster_ca_certificate)
}
Run Code Online (Sandbox Code Playgroud)
输出.tf
output "node_resource_group" {
value = azurerm_kubernetes_cluster.aks.node_resource_group
description = "The name of resource group where the AKS Nodes are created"
}
output "kubeConfig" {
value = azurerm_kubernetes_cluster.aks.kube_config_raw
description = "Kubeconfig of AKS Cluster"
}
output "host" {
value = azurerm_kubernetes_cluster.aks.kube_config.0.host
}
output "client_key" {
value = azurerm_kubernetes_cluster.aks.kube_config.0.client_key
}
output "client_certificate" {
value = azurerm_kubernetes_cluster.aks.kube_config.0.client_certificate
}
output "kube_config" {
value = azurerm_kubernetes_cluster.aks.kube_config_raw
}
output "cluster_ca_certificate" {
value = azurerm_kubernetes_cluster.aks.kube_config.0.cluster_ca_certificate
}
Run Code Online (Sandbox Code Playgroud)
主.tf
resource "azurerm_log_analytics_workspace" "law" {
name = "${var.tla}-la-${local.lookup_result}-${var.identifier}"
location = data.azurerm_resource_group.rg.location
resource_group_name = data.azurerm_resource_group.rg.name
sku = var.la_sku
retention_in_days = 30
}
resource "azurerm_kubernetes_cluster" "aks" {
name = "${var.tla}-aks-${local.lookup_result}-${var.identifier}"
location = data.azurerm_resource_group.rg.location
resource_group_name = data.azurerm_resource_group.rg.name
dns_prefix = var.dns_prefix
kubernetes_version = var.kubernetes_version
sku_tier = var.sku_tier
private_cluster_enabled = var.enable_private_cluster
#api_server_authorized_ip_ranges = ""
default_node_pool {
name = "syspool001"
orchestrator_version = var.orchestrator_version
availability_zones = var.agents_availability_zones
enable_auto_scaling = true
node_count = var.default_pool_node_count
max_count = var.default_pool_max_node_count
min_count = var.default_pool_min_node_count
max_pods = var.default_pool_max_pod_count
vm_size = var.agents_size
enable_node_public_ip = false
os_disk_size_gb = var.default_pool_os_disk_size_gb
type = "VirtualMachineScaleSets"
vnet_subnet_id = var.vnet_subnet_id
node_labels = var.agents_labels
tags = merge(local.tags, var.agents_tags)
}
network_profile {
network_plugin = var.network_plugin
network_policy = var.network_policy
dns_service_ip = var.net_profile_dns_service_ip
docker_bridge_cidr = var.net_profile_docker_bridge_cidr
service_cidr = var.net_profile_service_cidr
}
role_based_access_control {
enabled = true
azure_active_directory {
managed = true
admin_group_object_ids = var.rbac_aad_admin_group_object_ids
}
}
identity {
type = "SystemAssigned"
}
addon_profile {
azure_policy {
enabled = true
}
http_application_routing {
enabled = false
}
oms_agent {
enabled = true
log_analytics_workspace_id = data.azurerm_log_analytics_workspace.log_analytics.id
}
}
tags = local.tags
lifecycle {
ignore_changes = [
default_node_pool
]
}
}
resource "azurerm_kubernetes_cluster_node_pool" "aksnp" {
lifecycle {
ignore_changes = [
node_count
]
}
for_each = var.additional_node_pools
kubernetes_cluster_id = azurerm_kubernetes_cluster.aks.id
name = each.value.node_os == "Windows" ? substr(each.key, 0, 6) : substr(each.key, 0, 12)
node_count = each.value.node_count
vm_size = each.value.vm_size
availability_zones = each.value.zones
max_pods = each.value.max_pods
os_disk_size_gb = each.value.os_disk_size_gb
os_type = each.value.node_os
vnet_subnet_id = var.vnet_subnet_id
node_taints = each.value.taints
enable_auto_scaling = each.value.cluster_auto_scaling
min_count = each.value.cluster_auto_scaling_min_count
max_count = each.value.cluster_auto_scaling_max_count
}
resource "kubernetes_namespace" "aks-namespace" {
metadata {
name = var.namespace
}
}
Run Code Online (Sandbox Code Playgroud)
数据.tf
data "azurerm_resource_group" "rg" {
name = var.resource_group_name
}
Run Code Online (Sandbox Code Playgroud)
查找.tf
locals {
environment_lookup = {
dev = "d"
test = "t"
int = "i"
prod = "p"
prd = "p"
uat = "a"
poc = "d"
dr = "r"
lab = "l"
}
lookup_result = lookup(local.environment_lookup, var.environment)
tags = merge(
data.azurerm_resource_group.rg.tags, {
Directory = "tectcompany.com",
PrivateDNSZone = var.private_dns_zone,
Immutable = "False",
ManagedOS = "True",
}
)
}
data "azurerm_log_analytics_workspace" "log_analytics" {
name = "abc-az-lad2"
resource_group_name = "abc-dev-aae"
}
Run Code Online (Sandbox Code Playgroud)
变量.tf
variable "secondary_region" {
description = "Is this resource being deployed into the secondary (pair) region?"
default = false
type = bool
}
variable "override_log_analytics_workspace" {
description = "Override the vm log analytics workspace"
type = string
default = null
}
variable "override_log_analytics_resource_group_name" {
description = "Overrides the log analytics resource group name"
type = string
default = null
}
variable "environment" {
description = "The name of environment for the AKS Cluster"
type = string
default = "dev"
}
variable "identifier" {
description = "The identifier for the AKS Cluster"
type = number
default = "001"
}
variable "kubernetes_version" {
description = "Specify which Kubernetes release to use. The default used is the latest Kubernetes version available in the region"
type = string
default = "1.19.9"
}
variable "dns_prefix" {
description = "The dns prefix for the AKS Cluster"
type = string
default = "odessa-sandpit"
}
variable "orchestrator_version" {
description = "Specify which Kubernetes release to use for the orchestration layer. The default used is the latest Kubernetes version available in the region"
type = string
default = null
}
variable "agents_availability_zones" {
description = "(Optional) A list of Availability Zones across which the Node Pool should be spread. Changing this forces a new resource to be created."
type = list(string)
default = null
}
variable "agents_size" {
default = "Standard_D4s_v3"
description = "The default virtual machine size for the Kubernetes agents"
type = string
}
variable "vnet_subnet_id" {
description = "(Optional) The ID of a Subnet where the Kubernetes Node Pool should exist. Changing this forces a new resource to be created."
type = string
default = null
}
variable "agents_labels" {
description = "(Optional) A map of Kubernetes labels which should be applied to nodes in the Default Node Pool. Changing this forces a new resource to be created."
type = map(string)
default = {}
}
variable "agents_tags" {
description = "(Optional) A mapping of tags to assign to the Node Pool."
type = map(string)
default = {}
}
variable "net_profile_dns_service_ip" {
description = "(Optional) IP address within the Kubernetes service address range that will be used by cluster service discovery (kube-dns). Changing this forces a new resource to be created."
type = string
default = null
}
variable "net_profile_docker_bridge_cidr" {
description = "(Optional) IP address (in CIDR notation) used as the Docker bridge IP address on nodes. Changing this forces a new resource to be created."
type = string
default = null
}
variable "net_profile_service_cidr" {
description = "(Optional) The Network Range used by the Kubernetes service. Changing this forces a new resource to be created."
type = string
default = null
}
variable "rbac_aad_admin_group_object_ids" {
description = "Object ID of groups with admin access."
type = list(string)
default = null
}
variable "network_policy" {
description = "(Optional) The Network Policy to be used by the network profile of Azure Kubernetes Cluster."
type = string
default = "azure"
}
variable "network_plugin" {
description = "(Optional) The Network Plugin to be used by the network profile of Azure Kubernetes Cluster."
type = string
default = "azure"
}
variable "enable_private_cluster" {
description = "(Optional) Set this variable to true if you want Azure Kubernetes Cluster to be private."
default = true
}
variable "default_pool_node_count" {
description = "(Optional) The initial node count for the default pool of AKS Cluster"
type = number
default = 3
}
variable "default_pool_max_node_count" {
description = "(Optional) The max node count for the default pool of AKS Cluster"
type = number
default = 6
}
variable "default_pool_min_node_count" {
description = "(Optional) The min node count for the default pool of AKS Cluster"
type = number
default = 3
}
variable "default_pool_max_pod_count" {
description = "(Optional) The max pod count for the default pool of AKS Cluster"
type = number
default = 13
}
variable "default_pool_os_disk_size_gb" {
description = "(Optional) The size of os disk in gb for the nodes from default pool of AKS Cluster"
type = string
default = "64"
}
variable "additional_node_pools" {
type = map(object({
node_count = number
max_pods = number
os_disk_size_gb = number
vm_size = string
zones = list(string)
node_os = string
taints = list(string)
cluster_auto_scaling = bool
cluster_auto_scaling_min_count = number
cluster_auto_scaling_max_count = number
}))
}
variable "sku_tier" {
description = "(Optional)The SKU Tier that should be used for this Kubernetes Cluster, possible values Free or Paid"
type = string
default = "Paid"
validation {
condition = contains(["Free", "Paid"], var.sku_tier)
error_message = "SKU_TIER can only be either Paid or Free."
}
}
variable "la_sku" {
description = "(Optional)The SKU Tier that should be used for Log Analytics. Multiple values are possible."
type = string
default = "PerGB2018"
validation {
condition = contains(["Free", "PerNode", "Premium", "Standard", "Standalone", "Unlimited", "CapacityReservation", "PerGB2018"], var.la_sku)
error_message = "SKU_TIER for Log Analytics can be can only be either of Free, PerNode, Premium, Standard, Standalone, Unlimited, CapacityReservation and PerGB2018(Default Value)."
}
}
variable "resource_group_name" {
description = "Resource Group for deploying AKS Cluster"
type = string
}
variable "private_dns_zone" {
description = "DNS prefix for AKS Cluster"
type = string
default = "testcluster"
}
variable "tla" {
description = "Three Level acronym - three letter abbreviation for application"
type = string
default = ""
validation {
condition = length(var.tla) == 3
error_message = "The TLA should be precisely three characters."
}
}
variable "namespace"{
description = "AKS Namespace"
type = string
}
Run Code Online (Sandbox Code Playgroud)
最后,我调用下面的模块来创建 AKS 群集、LA 以及 AKS 群集的命名空间:
provider "azurerm" {
features {}
#version = "~> 2.53.0"
}
module "aks-cluster1" {
source = "../../"
resource_group_name = "pst-aks-sandpit-dev-1"
tla = "pqr"
additional_node_pools = {
pool1 = {
node_count = "1"
max_pods = "110"
os_disk_size_gb = "30"
vm_size = "Standard_D8s_v3"
zones = ["1","2","3"]
node_os = "Linux"
taints = ["kubernetes.io/os=windows:NoSchedule"]
cluster_auto_scaling = true
cluster_auto_scaling_min_count = "2"
cluster_auto_scaling_max_count = "4"
}
}
namespace = "sample-ns"
}
Run Code Online (Sandbox Code Playgroud)
问题: 当 terraform 尝试创建集群时,我收到一条错误消息,指出没有此类主机。
我认为它无法连接到集群,但我可能是错的。不知道内部是怎么处理的。
Error: Post "https://testdns-05885a32.145f13c0-25ce-43e4-ae46-8cbef448ecf3.privatelink.australiaeast.azmk8s.io:443/api/v1/namespaces": dial tcp: lookup testdns-05885a32.145f13c0-25ce-43e4-ae46-8cbef448ecf3.privatelink.australiaeast.azmk8s.io: no such host
Run Code Online (Sandbox Code Playgroud)
小智 6
我是 Terraform Kubernetes 提供商的维护者之一,我经常看到这个特殊问题。作为一名前开发人员,我对我在这个领域不断看到的斗争表示同情。如果可能的话,我真的很想在提供商中解决这个问题。
您面临的问题是 Terraform 核心在将未知值传递给提供程序配置块时的限制。引用他们的文档:
You can use expressions in the values of these configuration arguments,
but can only reference values that are known before the configuration is applied.
Run Code Online (Sandbox Code Playgroud)
当您对底层基础设施(例如本例中的 AKS 集群)进行更改时,您会将未知值传递到 Kubernetes 提供程序配置块中,因为只有在应用更改后才能了解集群基础设施的完整范围到 AKS 群集。
尽管我确实编写了最初的指南来表明可以解决其中一些问题,但正如您从经验中发现的那样,有许多边缘情况使其成为一个不可靠且不直观的过程,以使 Kubernetes 提供程序正常工作与底层基础设施一起。这是由于Terraform中长期存在的 限制,无法在任何提供程序中修复,但我们确实计划通过预先添加更好的错误消息来稍微平滑这些障碍,这可以让您在这方面省去一些麻烦。案件。
为了解决这种特定类型的问题,集群基础设施需要保持与 Kubernetes 和 Helm 提供程序资源分离的状态。我这里有一个示例,它在一个应用程序中构建 AKS 集群,然后在第二个应用程序中管理 Kubernetes/Helm 资源。您可以使用此方法为您的特定用例构建最强大的配置:
我知道这种双应用的方法很不方便,这就是为什么我们继续尝试在单应用场景以及包含处于相同 Terraform 状态的 Kubernetes 和集群资源的场景中容纳用户。然而,在上游 Terraform 可以添加对此的支持之前,单应用工作流程将仍然存在缺陷,而且可靠性低于将集群基础设施与 Kubernetes 资源分开。
大多数情况可以通过使用depends_on(以确保集群在 Kubernetes 资源之前创建)或通过将集群基础设施移至单独的模块并运行terraform state rm module.kubernetes-config或 来解决terraform apply -target=module.aks-cluster。但我认为,从长远来看,鼓励这种解决方法会带来更多麻烦,因为它让用户负责弄清楚何时使用特殊的一次性应用命令,而不是设置 Terraform 使其行为可靠且可预测开始。另外,它可能会产生意想不到的副作用,例如孤立云资源。
| 归档时间: |
|
| 查看次数: |
3323 次 |
| 最近记录: |