From 586b1343d49f7e132b45176f93b33bcdf8dc6f1a Mon Sep 17 00:00:00 2001 From: Stefan Le Breton Date: Tue, 19 Nov 2024 22:23:38 +0100 Subject: [PATCH] tf code --- cilium.tf | 66 +++++++++++++++++++ hcloud.tf | 87 ++++++++++++++++++++++++ hcloud_firewall.tf | 83 +++++++++++++++++++++++ locals.tf | 7 ++ outputs.tf | 9 +++ talos.tf | 160 +++++++++++++++++++++++++++++++++++++++++++++ variables.tf | 65 ++++++++++++++++++ 7 files changed, 477 insertions(+) create mode 100644 cilium.tf create mode 100644 hcloud.tf create mode 100644 hcloud_firewall.tf create mode 100644 locals.tf create mode 100644 outputs.tf create mode 100644 talos.tf create mode 100644 variables.tf diff --git a/cilium.tf b/cilium.tf new file mode 100644 index 0000000..77a0c39 --- /dev/null +++ b/cilium.tf @@ -0,0 +1,66 @@ + +data "helm_template" "cilium" { + #depends_on = [local_file.kubeconfig] + name = "cilium" + namespace = "kube-system" + chart = "cilium" + repository = "https://helm.cilium.io/" + version = "1.16.0-rc.0" + kube_version = "1.29" + atomic = true + + # features + set { + name = "nodeIPAM.enabled" + value = "true" + } + set { + name = "gatewayAPI.enabled" + value = "true" + } + + # node count specific + set { # operator relicas + name = "operator.replicas" + value = (var.talos_num_cp + var.talos_num_wk == 1) ? 1 : var.cilium_operator_replicas + } + + # talos settings + set { # ipam mode + name = "ipam.mode" + value = "kubernetes" + } + set { + name = "kubeProxyReplacement" + value = "true" + } + set { + name = "securityContext.capabilities.ciliumAgent" + value = "{CHOWN,KILL,NET_ADMIN,NET_RAW,IPC_LOCK,SYS_ADMIN,SYS_RESOURCE,DAC_OVERRIDE,FOWNER,SETGID,SETUID}" + } + set { + name = "securityContext.capabilities.cleanCiliumState" + value = "{NET_ADMIN,SYS_ADMIN,SYS_RESOURCE}" + } + set { + name = "cgroup.autoMount.enabled" + value = "false" + } + set { + name = "cgroup.hostRoot" + value = "/sys/fs/cgroup" + } + set { + name = "k8sServiceHost" + value = "localhost" + } + set { + name = "k8sServicePort" + value = "7445" + } +} + +# output "cilium_manifest" { +# value = data.helm_template.cilium.manifest +# sensitive = true +# } diff --git a/hcloud.tf b/hcloud.tf new file mode 100644 index 0000000..1be6301 --- /dev/null +++ b/hcloud.tf @@ -0,0 +1,87 @@ + +data "hcloud_image" "talos_image" { + with_selector = "type=infra,os=talos,version=${var.talos_version}" +} + +output "talos_image" { + value = data.hcloud_image.talos_image.id +} + +resource "hcloud_network" "private_net" { + name = var.hcloud_private_network.name + ip_range = var.hcloud_private_network.cidr +} + +resource "hcloud_network_subnet" "private_subnet" { + network_id = hcloud_network.private_net.id + type = "cloud" + network_zone = var.hcloud_network_zone + ip_range = var.hcloud_private_network.cidr +} + +resource "hcloud_server" "talos_cp" { + count = var.talos_num_cp + name = format("cp%02d", count.index + 1) + image = data.hcloud_image.talos_image.id + server_type = var.hcloud_server_type_cp + datacenter = var.hcloud_datacenter + firewall_ids = [hcloud_firewall.fw_extern.id] + network { + network_id = hcloud_network.private_net.id + } + + labels = { + "app" : "talos" + "cluster" : var.talos_cluster_name + "type" : "controlplane" + } + depends_on = [hcloud_network.private_net] +} + +resource "hcloud_volume" "master" { + # no fs or partitions on disk! + for_each = { for s in concat(hcloud_server.talos_cp, hcloud_server.talos_wk) : s.name => s } + name = format("%s-%s", var.talos_cluster_name, each.value.name) + size = 10 + server_id = each.value.id + automount = false +} + +resource "hcloud_server" "talos_wk" { + count = var.talos_num_wk + name = format("wk%02d", count.index + 1) + + image = data.hcloud_image.talos_image.id + server_type = var.hcloud_server_type_wk + datacenter = var.hcloud_datacenter + firewall_ids = [hcloud_firewall.fw_extern.id] + network { + network_id = hcloud_network.private_net.id + } + + labels = { + "app" : "talos" + "cluster" : var.talos_cluster_name + "type" : "worker" + } + depends_on = [hcloud_network.private_net] +} + +resource "hcloud_firewall_attachment" "fw_cluster" { + firewall_id = hcloud_firewall.fw_cluster.id + server_ids = concat(hcloud_server.talos_cp[*].id, hcloud_server.talos_wk[*].id) +} + +resource "hcloud_rdns" "rdns_v4" { # reverse dns + for_each = { for s in concat(hcloud_server.talos_cp, hcloud_server.talos_wk) : s.name => s } + server_id = each.value.id + ip_address = each.value.ipv4_address + dns_ptr = format("%s.%s", each.value.name, local.full_domain) +} +resource "hcloud_rdns" "rdns_v6" { # reverse dns + for_each = { for s in concat(hcloud_server.talos_cp, hcloud_server.talos_wk) : s.name => s } + server_id = each.value.id + ip_address = each.value.ipv6_address + dns_ptr = format("%s.%s", each.value.name, local.full_domain) +} + diff --git a/hcloud_firewall.tf b/hcloud_firewall.tf new file mode 100644 index 0000000..1c7c6a4 --- /dev/null +++ b/hcloud_firewall.tf @@ -0,0 +1,83 @@ +# create basic firewall for talos + +resource "hcloud_firewall" "fw_extern" { + name = "fw_extern" + + # basic networking + rule { # icmp + description = "allow icmp" + direction = "in" + protocol = "icmp" + source_ips = [ + "0.0.0.0/0", + "::/0" + ] + } + + # management + rule { # talos api + description = "allow talosctl" + direction = "in" + protocol = "tcp" + port = "50000-50001" + source_ips = [ + "0.0.0.0/0", + "::/0" + ] + } + rule { # kube api + description = "allow kube api" + direction = "in" + protocol = "tcp" + port = "6443" + source_ips = [ + "0.0.0.0/0", + "::/0" + ] + } + rule { # kubespan + description = "allow kubespan" + direction = "in" + protocol = "udp" + port = "51820" + source_ips = [ + "0.0.0.0/0", + "::/0" + ] + } + + # services + rule { # http + description = "allow http" + direction = "in" + protocol = "tcp" + port = "80" + source_ips = [ + "0.0.0.0/0", + "::/0" + ] + } + rule { # https + description = "allow https" + direction = "in" + protocol = "tcp" + port = "443" + source_ips = [ + "0.0.0.0/0", + "::/0" + ] + } +} + +resource "hcloud_firewall" "fw_cluster" { + name = "fw_cluster" + rule { # cilium + description = "allow 10250 for cilium" + direction = "in" + protocol = "tcp" + port = "10250" + source_ips = [ + for s in concat(hcloud_server.talos_cp, hcloud_server.talos_cp) : "${s.ipv4_address}/32" #format("%s/32", s.ipv4_address) + ] + } +} diff --git a/locals.tf b/locals.tf new file mode 100644 index 0000000..38f0856 --- /dev/null +++ b/locals.tf @@ -0,0 +1,7 @@ +locals { + # DNS names, subdomain is optional + rel_domain = format("%s%s", var.talos_cluster_name, + var.subdomain == "" ? "" : format(".%s", var.subdomain)) + + full_domain = format("%s.%s", local.rel_domain, var.dns_zone) +} \ No newline at end of file diff --git a/outputs.tf b/outputs.tf new file mode 100644 index 0000000..9b87295 --- /dev/null +++ b/outputs.tf @@ -0,0 +1,9 @@ +output "talosconfig" { + value = data.talos_client_configuration.this.talos_config + sensitive = true +} + +output "kubeconfig" { + value = data.talos_cluster_kubeconfig.this.kubeconfig_raw + sensitive = true +} \ No newline at end of file diff --git a/talos.tf b/talos.tf new file mode 100644 index 0000000..45b00cb --- /dev/null +++ b/talos.tf @@ -0,0 +1,160 @@ + + +locals { + hccm_secret = yamlencode({ + apiVersion = "v1" + kind = "Secret" + metadata = { + name = "hcloud" + namespace = "kube-system" + } + data = { + network = base64encode(var.hcloud_private_network.name) + token = base64encode(var.hcloud_token) + } + }) + + endpoint_ip = hcloud_server.talos_cp[0].ipv4_address + + talos_config_cp_patches = yamlencode({ + cluster = { + # etcd = { + # advertisedSubnets = [ + # "!${var.hcloud_private_network.cidr}" + # ] + # } + } + }) + talos_config_patches = yamlencode({ + cluster = { + allowSchedulingOnControlPlanes = true + network = { + cni = { + name = "none" + } + } + proxy = { + disabled = true + } + externalCloudProvider = { + enabled = true + } + discovery = { + enabled = true + } + # etcd = { + # advertisedSubnets = [ + # "${var.hcloud_private_network.cidr}" + # ] + # } + extraManifests = [ + "https://raw.githubusercontent.com/hetznercloud/hcloud-cloud-controller-manager/main/deploy/ccm-networks.yaml", + "https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.0.0/experimental-install.yaml" + ] + inlineManifests = [ + { + name = "cilium" + contents = data.helm_template.cilium.manifest + }, + { + name = "hccm secret" + contents = local.hccm_secret + } + ] + } + machine = { + kubelet = { + nodeIP = { + validSubnets = [ + "${var.hcloud_private_network.cidr}" + ] + } + } + } + }) + + # for longhorn and small storage needs, hetzner volumes are at least 10G + talos_disk_patch = yamlencode({ + machine = { + kubelet = { + extraMounts = [ + { + destination = "/var/mnt/storage" + type = "bind" + source = "/var/mnt/storage" + options = ["bind", "rshared", "rw"] + } + ] + } + disks = [{ + device = "/dev/sdb" + partitions = [{ mountpoint = "/var/mnt/storage" }] + }] + } + }) +} + + +# create secrets +resource "talos_machine_secrets" "this" {} + +data "talos_client_configuration" "this" { + cluster_name = var.talos_cluster_name + client_configuration = talos_machine_secrets.this.client_configuration + endpoints = [local.endpoint_ip] +} + +data "talos_machine_configuration" "controlplane" { + cluster_name = var.talos_cluster_name + cluster_endpoint = "https://${local.endpoint_ip}:6443" + machine_type = "controlplane" + machine_secrets = talos_machine_secrets.this.machine_secrets + config_patches = [ + local.talos_config_patches, + local.talos_config_cp_patches, + local.talos_disk_patch + ] +} + +resource "talos_machine_configuration_apply" "controlplane" { + for_each = { for s in hcloud_server.talos_cp : s.name => s } + client_configuration = talos_machine_secrets.this.client_configuration + machine_configuration_input = data.talos_machine_configuration.controlplane.machine_configuration + node = each.value.ipv4_address +} + +resource "talos_machine_bootstrap" "this" { + depends_on = [ + talos_machine_configuration_apply.controlplane + ] + node = local.endpoint_ip + client_configuration = talos_machine_secrets.this.client_configuration +} + +data "talos_machine_configuration" "worker" { + cluster_name = var.talos_cluster_name + cluster_endpoint = "https://${local.endpoint_ip}:6443" + machine_type = "worker" + machine_secrets = talos_machine_secrets.this.machine_secrets + config_patches = [local.talos_config_patches, local.talos_disk_patch] +} + +resource "talos_machine_configuration_apply" "worker" { + for_each = { for s in hcloud_server.talos_wk : s.name => s } + client_configuration = talos_machine_secrets.this.client_configuration + machine_configuration_input = data.talos_machine_configuration.worker.machine_configuration + node = each.value.ipv4_address +} + +data "talos_cluster_kubeconfig" "this" { + client_configuration = talos_machine_secrets.this.client_configuration + node = local.endpoint_ip + # wait = true +} + +# actually there should be a check with talos_cluster_health, but fails for me with internal network + + + + + diff --git a/variables.tf b/variables.tf new file mode 100644 index 0000000..3482fd2 --- /dev/null +++ b/variables.tf @@ -0,0 +1,65 @@ +# TALOS +variable "talos_num_cp" { + type = number + description = "number of controlplan servers" +} +variable "talos_num_wk" { + type = number + description = "number of worker servers" +} +variable "talos_version" { + type = string + description = "talos image version" +} +variable "talos_cluster_name" { + type = string + description = "name of the cluster" +} +variable "subdomain" { + type = string + description = "subdomain" +} + +# HCLOUD +variable "hcloud_token" { + sensitive = true +} +variable "hcloud_datacenter" { + type = string +} +variable "hcloud_network_zone" { + type = string +} +variable "hcloud_server_type_cp" { + type = string +} +variable "hcloud_server_type_wk" { + type = string +} +variable "hcloud_private_network" { + type = object({ + name = string + cidr = string + }) + default = { + name = "talos-private" + cidr = "10.1.0.0/24" + } +} + + +# cilium +variable "cilium_operator_replicas" { + type = number + default = 2 +} + +# DNS +variable "dns_zone" { + type = string +} + + +variable "kubeconfig" { + type = string +}