diff --git a/.gitignore b/.gitignore index e43b0f9..0731c5a 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ .DS_Store +rke2-token diff --git a/README.md b/README.md index 959729a..9b2a7e4 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ scp -r * root@10.10.10.10:/etc/nixos ls -l /dev/disk/by-id # Partition Disk +# WARNING: This will destroy all data on the disk(s) sudo nix \ --experimental-features "nix-command flakes" \ run github:nix-community/disko -- \ @@ -33,7 +34,7 @@ sudo reboot ## Copy Config Back to Host ```bash -scp -r * nixos@10.10.10.10:/etc/nixos +scp -r * nixos@10.0.20.201:/etc/nixos ``` ## Rebuild NixOS @@ -52,10 +53,14 @@ sudo nixos-install --flake /etc/nixos#lin-va-rke1 cat /var/lib/rancher/rke2/server/node-token # Deploy Following Nodes -echo "" > ./k8s/rke2-token +echo "" > rke2-token sudo nixos-install --flake /etc/nixos#lin-va-rke2 ``` -## TODO +## Notes -OpenEBS DiskPool Configuration not being applied. Likely need to consolidate RKE2 config, generate DiskPool config in complete, then apply. +## Kasten Port Forward + +```bash +kubectl port-forward -n kasten svc/gateway 8000:80 +``` diff --git a/flake.nix b/flake.nix index 9febd0d..4bf4ccd 100644 --- a/flake.nix +++ b/flake.nix @@ -6,100 +6,82 @@ disko.url = "github:nix-community/disko"; }; - outputs = { self, nixpkgs, disko }: { - nixosConfigurations.lin-va-llama1 = nixpkgs.lib.nixosSystem { - # LLaMA C++ Server - system = "x86_64-linux"; - modules = [ - disko.nixosModules.disko - ./hosts/llama-server.nix - { - networking.hostName = "lin-va-llama1"; - disko.devices.disk.main.device = "/dev/sda"; - k8s.diskPoolID = "/dev/disk/by-id/unknown"; - } - ]; - }; - - # K3s Server - nixosConfigurations.lin-va-k3s1 = nixpkgs.lib.nixosSystem { - system = "x86_64-linux"; - modules = [ - disko.nixosModules.disko - ./hosts/k3s.nix - { - networking.hostName = "lin-va-k3s1"; - disko.devices.disk.main.device = "/dev/sda"; - } - ]; - }; - - # RKE2 Primary Server - nixosConfigurations.lin-va-rke1 = nixpkgs.lib.nixosSystem { - system = "x86_64-linux"; - modules = [ - disko.nixosModules.disko - ./hosts/rke2.nix - { - networking.hostName = "lin-va-rke1"; - - # Partitions - disko.devices.disk.main.device = "/dev/disk/by-id/ata-VBOX_HARDDISK_VB0af7d668-04b70404"; - k8s.diskPoolID = "/dev/disk/by-id/ata-VBOX_HARDDISK_VBcd9425b8-d666f9b8"; - } - ]; - }; - - # RKE2 Second Server - nixosConfigurations.lin-va-rke2 = nixpkgs.lib.nixosSystem { - system = "x86_64-linux"; - modules = [ - disko.nixosModules.disko - ./hosts/rke2.nix - { - networking.hostName = "lin-va-rke2"; - - # Partitions - disko.devices.disk.main.device = "/dev/disk/by-id/ata-VBOX_HARDDISK_VBf55aaccc-688cfd0d"; - k8s.diskPoolID = "/dev/disk/by-id/ata-VBOX_HARDDISK_VBfd391256-6e368424"; - - # Set RKE2 Join - services.rke2.serverAddr = "https://10.0.20.147:9345"; - services.rke2.tokenFile = "/etc/rancher/rke2/node-token"; - environment.etc."rancher/rke2/node-token" = { - source = ./k8s/rke2-token; - mode = "0600"; - user = "root"; - group = "root"; + outputs = { self, nixpkgs, disko }: + let + mkSystem = { systemConfig, moduleConfig }: nixpkgs.lib.nixosSystem { + system = "x86_64-linux"; + modules = [ + disko.nixosModules.disko + ./lib/disk-config.nix + ./lib/common-system.nix + systemConfig + ({ ... }: moduleConfig) + ]; + }; + in + { + nixosConfigurations = { + # LLaMA C++ Server + lin-va-llama1 = mkSystem { + systemConfig = ./hosts/llama-server.nix; + moduleConfig = { + hostName = "lin-va-llama1"; + mainDiskID = "/dev/disk/by-id/ata-MTFDDAK512MBF-1AN1ZABHA_161212233628"; }; - } - ]; - }; + }; - # RKE2 Third Server - nixosConfigurations.lin-va-rke3 = nixpkgs.lib.nixosSystem { - system = "x86_64-linux"; - modules = [ - disko.nixosModules.disko - ./hosts/rke2.nix - { - networking.hostName = "lin-va-rke3"; + # RKE2 Primary Server + lin-va-rke1 = mkSystem { + systemConfig = ./hosts/rke2.nix; + moduleConfig = { + hostName = "lin-va-rke1"; + mainDiskID = "/dev/disk/by-id/ata-VBOX_HARDDISK_VB0af7d668-04b70404"; + dataDiskID = "/dev/disk/by-id/ata-VBOX_HARDDISK_VBcd9425b8-d666f9b8"; - # Partitions - disko.devices.disk.main.device = "/dev/disk/by-id/ata-VBOX_HARDDISK_VBe9edacd5-ac4ed4fa"; - k8s.diskPoolID = "/dev/disk/by-id/ata-VBOX_HARDDISK_VBa1fc46d0-19380495"; - - # Set RKE2 Join - services.rke2.serverAddr = "https://10.0.20.147:9345"; - services.rke2.tokenFile = "/etc/rancher/rke2/node-token"; - environment.etc."rancher/rke2/node-token" = { - source = ./k8s/rke2-token; - mode = "0600"; - user = "root"; - group = "root"; + networkConfig = { + interface = "enp0s3"; + address = "10.0.20.201"; + defaultGateway = "10.0.20.254"; + nameservers = [ "10.0.20.254" ]; + }; }; - } - ]; + }; + + # RKE2 Second Server + lin-va-rke2 = mkSystem { + systemConfig = ./hosts/rke2.nix; + moduleConfig = { + hostName = "lin-va-rke2"; + mainDiskID = "/dev/disk/by-id/ata-VBOX_HARDDISK_VBf55aaccc-688cfd0d"; + dataDiskID = "/dev/disk/by-id/ata-VBOX_HARDDISK_VBfd391256-6e368424"; + serverAddr = "https://10.0.20.201:9345"; + + networkConfig = { + interface = "enp0s3"; + address = "10.0.20.202"; + defaultGateway = "10.0.20.254"; + nameservers = [ "10.0.20.254" ]; + }; + }; + }; + + # RKE2 Third Server + lin-va-rke3 = mkSystem { + systemConfig = ./hosts/rke2.nix; + moduleConfig = { + hostName = "lin-va-rke3"; + mainDiskID = "/dev/disk/by-id/ata-VBOX_HARDDISK_VBe9edacd5-ac4ed4fa"; + dataDiskID = "/dev/disk/by-id/ata-VBOX_HARDDISK_VBa1fc46d0-19380495"; + serverAddr = "https://10.0.20.201:9345"; + + networkConfig = { + interface = "enp0s3"; + address = "10.0.20.203"; + defaultGateway = "10.0.20.254"; + nameservers = [ "10.0.20.254" ]; + }; + }; + }; + }; }; - }; } diff --git a/hosts/k3s.nix b/hosts/k3s.nix deleted file mode 100644 index 8e00a5e..0000000 --- a/hosts/k3s.nix +++ /dev/null @@ -1,123 +0,0 @@ -{ config, pkgs, ... }: - -{ - imports = [ - ../k8s - ]; - k8s.manifestsDir = "/var/lib/rancher/k3s/server/manifests"; - - # Enable Flakes - nix.settings.experimental-features = [ "nix-command" "flakes" ]; - - # System Configuration - boot.kernelModules = [ "nvme_tcp" ]; # OpenEBS Mayastor Requirement - boot.kernel.sysctl = { - "vm.nr_hugepages" = 1024; - }; - boot.loader.systemd-boot.enable = true; - boot.loader.efi.canTouchEfiVariables = true; - boot.loader.efi.efiSysMountPoint = "/boot"; - - # Disk Configuration - disko.devices = { - disk = { - main = { - type = "disk"; - content = { - type = "gpt"; - partitions = { - boot = { - size = "512M"; - type = "EF00"; # EFI - content = { - type = "filesystem"; - format = "vfat"; - mountpoint = "/boot"; - mountOptions = [ "umask=0077" ]; - }; - }; - root = { - size = "100%"; - content = { - type = "filesystem"; - format = "ext4"; - mountpoint = "/"; - }; - }; - }; - }; - }; - }; - }; - - - # Network Configuration - networking = { - networkmanager.enable = true; - firewall = { - enable = true; - - # Single Node Required Ports - allowedTCPPorts = [ 6443 ]; - - # Multi Node Required Ports - # allowedTCPPorts = [ 6443 2379 2380 10250 ]; - # allowedUDPPorts = [ 8472 ]; - }; - }; - - # Enable K3s - services.k3s = { - enable = true; - role = "server"; - extraFlags = toString [ - "--disable=traefik" # Should we enable? - "--disable=servicelb" - ]; - }; - - # Enable SSH Server - services.openssh = { - enable = true; - settings = { - PasswordAuthentication = false; # Disable Password Login - PermitRootLogin = "prohibit-password"; # Disable Password Login - }; - }; - - # User Configuration - users.users.root = { - openssh.authorizedKeys.keys = [ - "ssh-rsa AAAAB3NzaC1yc2EAAAABJQAAAQEA8P84lWL/p13ZBFNwITm/dLWWL8s9pVmdOImM5gaJAiTLY+DheUvG6YsveB2/5STseiJ34g7Na9TW1mtTLL8zDqPvj3NbprQiYlLJKMbCk6dtfdD4nLMHl8B48e1h699XiZDp2/c+jJb0MkLOFrps+FbPqt7pFt1Pj29tFy8BCg0LGndu6KO+HqYS+aM5tp5hZESo1RReiJ8aHsu5X7wW46brN4gfyyu+8X4etSZAB9raWqlln9NKK7G6as6X+uPypvSjYGSTC8TSePV1iTPwOxPk2+1xBsK7EBLg3jNrrYaiXLnZvBOOhm11JmHzqEJ6386FfQO+0r4iDVxmvi+ojw== rsa-key-20141114" - ]; - hashedPassword = null; # Disable Password Login - }; - - # System Packages - environment.systemPackages = with pkgs; [ - k9s - kubectl - kubernetes-helm - nfs-utils - vim - ]; - - # Enable Container Features - virtualisation = { - docker.enable = false; - containerd = { - enable = true; - settings = { - version = 2; - plugins."io.containerd.grpc.v1.cri" = { - containerd.runtimes.runc = { - runtime_type = "io.containerd.runc.v2"; - }; - }; - }; - }; - }; - - # System State Version - system.stateVersion = "24.11"; -} diff --git a/hosts/llama-server.nix b/hosts/llama-server.nix index 7342412..4d7e0cc 100644 --- a/hosts/llama-server.nix +++ b/hosts/llama-server.nix @@ -5,7 +5,7 @@ let cudaSupport = true; }).overrideAttrs (oldAttrs: { cmakeFlags = oldAttrs.cmakeFlags ++ [ - "-DGGML_CUDA_ENABLE_UNIFIED_MEMORY=ON" + "-DGGML_CUDA_ENABLE_UNIFIED_MEMORY=1" # Disable CPU Instructions - Intel(R) Core(TM) i5-3570K CPU @ 3.40GHz "-DLLAMA_FMA=OFF" @@ -19,20 +19,19 @@ let # Define Model Vars modelDir = "/models"; - modelName = "qwen2.5-coder-7b-q8_0.gguf"; + + # 7B + # modelName = "qwen2.5-coder-7b-q8_0.gguf"; + # modelUrl = "https://huggingface.co/ggml-org/Qwen2.5-Coder-7B-Q8_0-GGUF/resolve/main/${modelName}?download=true"; + + # 3B + modelName = "qwen2.5-coder-3b-q8_0.gguf"; + modelUrl = "https://huggingface.co/ggml-org/Qwen2.5-Coder-3B-Q8_0-GGUF/resolve/main/${modelName}?download=true"; + modelPath = "${modelDir}/${modelName}"; - modelUrl = "https://huggingface.co/ggml-org/Qwen2.5-Coder-7B-Q8_0-GGUF/resolve/main/${modelName}?download=true"; in { - # Enable Flakes - nix.settings.experimental-features = [ "nix-command" "flakes" ]; - - # System Configuration - boot.loader.systemd-boot.enable = true; - boot.loader.efi.canTouchEfiVariables = true; - boot.loader.efi.efiSysMountPoint = "/boot"; - # Allow Nvidia & CUDA nixpkgs.config.allowUnfree = true; @@ -55,39 +54,6 @@ in nvidiaSettings = true; }; - # Disk Configuration - disko.devices = { - disk = { - main = { - type = "disk"; - content = { - type = "gpt"; - partitions = { - boot = { - size = "512M"; - type = "EF00"; # EFI - content = { - type = "filesystem"; - format = "vfat"; - mountpoint = "/boot"; - mountOptions = [ "umask=0077" ]; - }; - }; - root = { - size = "100%"; - content = { - type = "filesystem"; - format = "ext4"; - mountpoint = "/"; - }; - }; - }; - }; - }; - }; - }; - - # Network Configuration networking.networkmanager.enable = true; @@ -138,40 +104,40 @@ in model = modelPath; port = 8080; openFirewall = true; + + # 7B + # extraFlags = [ + # "-ngl" + # "99" + # "-fa" + # "-ub" + # "512" + # "-b" + # "512" + # "-dt" + # "0.1" + # "--ctx-size" + # "4096" + # "--cache-reuse" + # "256" + # ]; + + # 3B extraFlags = [ "-ngl" "99" "-fa" "-ub" - "512" + "1024" "-b" - "512" - "-dt" - "0.1" + "1024" "--ctx-size" - "4096" + "0" "--cache-reuse" "256" ]; }; - # Enable SSH Server - services.openssh = { - enable = true; - settings = { - PasswordAuthentication = false; # Disable Password Login - PermitRootLogin = "prohibit-password"; # Disable Password Login - }; - }; - - # User Configuration - users.users.root = { - openssh.authorizedKeys.keys = [ - "ssh-rsa AAAAB3NzaC1yc2EAAAABJQAAAQEA8P84lWL/p13ZBFNwITm/dLWWL8s9pVmdOImM5gaJAiTLY+DheUvG6YsveB2/5STseiJ34g7Na9TW1mtTLL8zDqPvj3NbprQiYlLJKMbCk6dtfdD4nLMHl8B48e1h699XiZDp2/c+jJb0MkLOFrps+FbPqt7pFt1Pj29tFy8BCg0LGndu6KO+HqYS+aM5tp5hZESo1RReiJ8aHsu5X7wW46brN4gfyyu+8X4etSZAB9raWqlln9NKK7G6as6X+uPypvSjYGSTC8TSePV1iTPwOxPk2+1xBsK7EBLg3jNrrYaiXLnZvBOOhm11JmHzqEJ6386FfQO+0r4iDVxmvi+ojw== rsa-key-20141114" - ]; - hashedPassword = null; # Disable Password Login - }; - # System Packages environment.systemPackages = with pkgs; [ htop @@ -180,7 +146,4 @@ in vim wget ]; - - # System State Version - system.stateVersion = "24.11"; } diff --git a/hosts/rke2-ceph.nix b/hosts/rke2-ceph.nix new file mode 100644 index 0000000..de2013a --- /dev/null +++ b/hosts/rke2-ceph.nix @@ -0,0 +1,147 @@ +{ config, pkgs, lib, ... }: + +{ + # Node Nix Config + options = { + dataDiskID = lib.mkOption { + type = lib.types.str; + description = "The device ID for the data disk"; + }; + serverAddr = lib.mkOption { + type = lib.types.str; + description = "The server to join"; + default = ""; + }; + networkConfig = lib.mkOption { + type = lib.types.submodule { + options = { + interface = lib.mkOption { + type = lib.types.str; + description = "Network interface name"; + example = "enp0s3"; + }; + address = lib.mkOption { + type = lib.types.str; + description = "Static IP address"; + example = "10.0.20.200"; + }; + defaultGateway = lib.mkOption { + type = lib.types.str; + description = "Default gateway IP"; + example = "10.0.20.254"; + }; + nameservers = lib.mkOption { + type = lib.types.listOf lib.types.str; + description = "List of DNS servers"; + example = [ "10.0.20.254" "8.8.8.8" ]; + default = [ "8.8.8.8" "8.8.4.4" ]; + }; + }; + }; + description = "Network configuration"; + }; + }; + + config = { + # ---------------------------------------- + # ---------- Base Configuration ---------- + # ---------------------------------------- + + # Ceph Requirements + boot.kernelModules = [ "rbd" ]; + + # Network Configuration + networking = { + hostName = config.hostName; + networkmanager.enable = false; + + # Interface Configuration + inherit (config.networkConfig) defaultGateway nameservers; + interfaces.${config.networkConfig.interface}.ipv4.addresses = [{ + inherit (config.networkConfig) address; + prefixLength = 24; + }]; + + firewall = { + enable = true; + + allowedTCPPorts = [ + # RKE2 Ports - https://docs.rke2.io/install/requirements#networking + 6443 # Kubernetes API + 9345 # RKE2 supervisor API + 2379 # etcd Client Port + 2380 # etcd Peer Port + 2381 # etcd Metrics Port + 10250 # kubelet metrics + 9099 # Canal CNI health checks + + # Ceph Ports + 3300 # Ceph MON daemon + 6789 # Ceph MON service + ] ++ lib.range 6800 7300; # Ceph OSD range + + allowedUDPPorts = [ + # RKE2 Ports - https://docs.rke2.io/install/requirements#networking + 8472 # Canal CNI with VXLAN + # 51820 # Canal CNI with WireGuard IPv4 (if using encryption) + # 51821 # Canal CNI with WireGuard IPv6 (if using encryption) + ]; + }; + }; + + # System Packages + environment.systemPackages = with pkgs; [ + htop + k9s + kubectl + kubernetes-helm + nfs-utils + tmux + vim + ]; + + # ---------------------------------------- + # ---------- RKE2 Configuration ---------- + # ---------------------------------------- + + # RKE2 Join Token + environment.etc."rancher/rke2/node-token" = lib.mkIf (config.serverAddr != "") { + source = ../rke2-token; + mode = "0600"; + user = "root"; + group = "root"; + }; + + # Enable RKE2 + services.rke2 = { + enable = true; + role = "server"; + + disable = [ + # Disable - Utilizing Traefik + "rke2-ingress-nginx" + + # Distable - Utilizing OpenEBS's Snapshot Controller + "rke2-snapshot-controller" + "rke2-snapshot-controller-crd" + "rke2-snapshot-validation-webhook" + ]; + + } // lib.optionalAttrs (config.serverAddr != "") { + serverAddr = config.serverAddr; + tokenFile = "/etc/rancher/rke2/node-token"; + }; + + # Bootstrap Kubernetes Manifests + system.activationScripts.k8s-manifests = { + deps = [ ]; + text = '' + mkdir -p /var/lib/rancher/rke2/server/manifests + + # Base Configs + cp ${../k8s/ceph.yaml} /var/lib/rancher/rke2/server/manifests/ceph-base.yaml + cp ${../k8s/kasten.yaml} /var/lib/rancher/rke2/server/manifests/kasten-base.yaml + ''; + }; + }; +} diff --git a/hosts/rke2-openebs.nix b/hosts/rke2-openebs.nix new file mode 100644 index 0000000..fc437f3 --- /dev/null +++ b/hosts/rke2-openebs.nix @@ -0,0 +1,162 @@ +{ config, pkgs, lib, ... }: + +{ + # Node Nix Config + options = { + dataDiskID = lib.mkOption { + type = lib.types.str; + description = "The device ID for the data disk"; + }; + serverAddr = lib.mkOption { + type = lib.types.str; + description = "The server to join"; + default = ""; + }; + networkConfig = lib.mkOption { + type = lib.types.submodule { + options = { + interface = lib.mkOption { + type = lib.types.str; + description = "Network interface name"; + example = "enp0s3"; + }; + address = lib.mkOption { + type = lib.types.str; + description = "Static IP address"; + example = "10.0.20.200"; + }; + defaultGateway = lib.mkOption { + type = lib.types.str; + description = "Default gateway IP"; + example = "10.0.20.254"; + }; + nameservers = lib.mkOption { + type = lib.types.listOf lib.types.str; + description = "List of DNS servers"; + example = [ "10.0.20.254" "8.8.8.8" ]; + default = [ "8.8.8.8" "8.8.4.4" ]; + }; + }; + }; + description = "Network configuration"; + }; + }; + + config = { + # ---------------------------------------- + # ---------- Base Configuration ---------- + # ---------------------------------------- + + # OpenEBS Mayastor Requirements + boot.kernelModules = [ "nvme_tcp" ]; + boot.kernel.sysctl = { + "vm.nr_hugepages" = 1024; + }; + + # Network Configuration + networking = { + hostName = config.hostName; + networkmanager.enable = false; + + # Interface Configuration + inherit (config.networkConfig) defaultGateway nameservers; + interfaces.${config.networkConfig.interface}.ipv4.addresses = [{ + inherit (config.networkConfig) address; + prefixLength = 24; + }]; + + firewall = { + enable = true; + + allowedTCPPorts = [ + # RKE2 Ports - https://docs.rke2.io/install/requirements#networking + 6443 # Kubernetes API + 9345 # RKE2 supervisor API + 2379 # etcd Client Port + 2380 # etcd Peer Port + 2381 # etcd Metrics Port + 10250 # kubelet metrics + 9099 # Canal CNI health checks + + # OpenEBS Mayastor - https://openebs.io/docs/user-guides/replicated-storage-user-guide/replicated-pv-mayastor/rs-installation#network-requirements + 10124 # REST API + 8420 # NVMf + 4421 # NVMf + ]; + + allowedUDPPorts = [ + # RKE2 Ports - https://docs.rke2.io/install/requirements#networking + 8472 # Canal CNI with VXLAN + # 51820 # Canal CNI with WireGuard IPv4 (if using encryption) + # 51821 # Canal CNI with WireGuard IPv6 (if using encryption) + ]; + }; + }; + + # System Packages + environment.systemPackages = with pkgs; [ + htop + k9s + kubectl + kubernetes-helm + nfs-utils + vim + ]; + + # ---------------------------------------- + # ---------- RKE2 Configuration ---------- + # ---------------------------------------- + + # RKE2 Join Token + environment.etc."rancher/rke2/node-token" = lib.mkIf (config.serverAddr != "") { + source = ../rke2-token; + mode = "0600"; + user = "root"; + group = "root"; + }; + + # Enable RKE2 + services.rke2 = { + enable = true; + role = "server"; + + disable = [ + # Disable - Utilizing Traefik + "rke2-ingress-nginx" + + # Distable - Utilizing OpenEBS's Snapshot Controller + "rke2-snapshot-controller" + "rke2-snapshot-controller-crd" + "rke2-snapshot-validation-webhook" + ]; + + # OpenEBS Scheduleable + nodeLabel = [ + "openebs.io/engine=mayastor" + ]; + + } // lib.optionalAttrs (config.serverAddr != "") { + serverAddr = config.serverAddr; + tokenFile = "/etc/rancher/rke2/node-token"; + }; + + # Bootstrap Kubernetes Manifests + system.activationScripts.k8s-manifests = { + deps = [ ]; + text = '' + mkdir -p /var/lib/rancher/rke2/server/manifests + + # Base Configs + cp ${../k8s/openebs.yaml} /var/lib/rancher/rke2/server/manifests/openebs-base.yaml + cp ${../k8s/kasten.yaml} /var/lib/rancher/rke2/server/manifests/kasten-base.yaml + + # OpenEBS Disk Pool + cp ${pkgs.substituteAll { + src = ../k8s/openebs-disk-pool.yaml; + hostName = config.hostName; + dataDiskID = config.dataDiskID; + }} /var/lib/rancher/rke2/server/manifests/openebs-disk-pool-${config.hostName}.yaml + ''; + }; + }; +} diff --git a/hosts/rke2.nix b/hosts/rke2.nix index 1022330..ed7f42d 100644 --- a/hosts/rke2.nix +++ b/hosts/rke2.nix @@ -1,147 +1,185 @@ -{ config, pkgs, ... }: +{ config, pkgs, lib, ... }: { - imports = [ - ../k8s - ]; - k8s.manifestsDir = "/var/lib/rancher/rke2/server/manifests"; - - # Enable Flakes - nix.settings.experimental-features = [ "nix-command" "flakes" ]; - - # System Configuration - boot.kernelModules = [ "nvme_tcp" ]; # OpenEBS Mayastor Requirement - boot.kernel.sysctl = { - "vm.nr_hugepages" = 1024; + # Node Nix Config + options = { + dataDiskID = lib.mkOption { + type = lib.types.str; + description = "The device ID for the data disk"; + }; + serverAddr = lib.mkOption { + type = lib.types.str; + description = "The server to join"; + default = ""; + }; + networkConfig = lib.mkOption { + type = lib.types.submodule { + options = { + interface = lib.mkOption { + type = lib.types.str; + description = "Network interface name"; + example = "enp0s3"; + }; + address = lib.mkOption { + type = lib.types.str; + description = "Static IP address"; + example = "10.0.20.200"; + }; + defaultGateway = lib.mkOption { + type = lib.types.str; + description = "Default gateway IP"; + example = "10.0.20.254"; + }; + nameservers = lib.mkOption { + type = lib.types.listOf lib.types.str; + description = "List of DNS servers"; + example = [ "10.0.20.254" "8.8.8.8" ]; + default = [ "8.8.8.8" "8.8.4.4" ]; + }; + }; + }; + description = "Network configuration"; + }; }; - boot.loader.systemd-boot.enable = true; - boot.loader.efi.canTouchEfiVariables = true; - boot.loader.efi.efiSysMountPoint = "/boot"; - # Disk Configuration - disko.devices = { - disk = { - main = { + config = { + # ---------------------------------------- + # ---------- Base Configuration ---------- + # ---------------------------------------- + + # Longhorn Requirements + boot.kernelModules = [ + "iscsi_tcp" + "dm_crypt" + ]; + + # Longhorn Data Disk + disko.devices = { + disk.longhorn = { type = "disk"; + device = config.dataDiskID; content = { type = "gpt"; partitions = { - boot = { - size = "512M"; - type = "EF00"; # EFI - content = { - type = "filesystem"; - format = "vfat"; - mountpoint = "/boot"; - mountOptions = [ "umask=0077" ]; - }; - }; - root = { + longhorn = { size = "100%"; content = { type = "filesystem"; - format = "ext4"; - mountpoint = "/"; + format = "xfs"; + mountpoint = "/storage/longhorn"; + mountOptions = [ "defaults" "nofail" ]; + extraArgs = [ "-d" "su=128k,sw=8" ]; }; }; }; }; }; }; - }; - # Network Configuration - networking = { - networkmanager.enable = true; - firewall = { + # Network Configuration + networking = { + hostName = config.hostName; + networkmanager.enable = false; + + # Interface Configuration + inherit (config.networkConfig) defaultGateway nameservers; + interfaces.${config.networkConfig.interface}.ipv4.addresses = [{ + inherit (config.networkConfig) address; + prefixLength = 24; + }]; + + firewall = { + enable = true; + + allowedTCPPorts = [ + # RKE2 Ports - https://docs.rke2.io/install/requirements#networking + 6443 # Kubernetes API + 9345 # RKE2 supervisor API + 2379 # etcd Client Port + 2380 # etcd Peer Port + 2381 # etcd Metrics Port + 10250 # kubelet metrics + 9099 # Canal CNI health checks + + # iSCSI Port + 3260 + ]; + + allowedUDPPorts = [ + # RKE2 Ports - https://docs.rke2.io/install/requirements#networking + 8472 # Canal CNI with VXLAN + # 51820 # Canal CNI with WireGuard IPv4 (if using encryption) + # 51821 # Canal CNI with WireGuard IPv6 (if using encryption) + ]; + }; + }; + + # System Packages + environment.systemPackages = with pkgs; [ + htop + k9s + kubectl + kubernetes-helm + nfs-utils + openiscsi + tmux + vim + ]; + + # ---------------------------------------- + # ---------- RKE2 Configuration ---------- + # ---------------------------------------- + + # RKE2 Join Token + environment.etc."rancher/rke2/node-token" = lib.mkIf (config.serverAddr != "") { + source = ../rke2-token; + mode = "0600"; + user = "root"; + group = "root"; + }; + + # Enable RKE2 + services.rke2 = { enable = true; + role = "server"; - # https://docs.rke2.io/install/requirements#networking - allowedTCPPorts = [ - # K8s Control Plane - 6443 # Kubernetes API - 9345 # RKE2 supervisor API - 2379 # etcd Client Port - 2380 # etcd Peer Port - 2381 # etcd Metrics Port + disable = [ + # Disable - Utilizing Traefik + "rke2-ingress-nginx" - # K8s Node Communication - 10250 # kubelet metrics - 9099 # Canal CNI health checks - - # OpenEBS Mayastor - 10124 # Mayastor REST API - 8420 # NVMf - 4421 # NVMf - ]; - - allowedUDPPorts = [ - 8472 # Canal CNI with VXLAN - # 51820 # Canal CNI with WireGuard IPv4 (if using encryption) - # 51821 # Canal CNI with WireGuard IPv6 (if using encryption) + # Disable - Utilizing Longhorn's Snapshot Controller + "rke2-snapshot-controller" + "rke2-snapshot-controller-crd" + "rke2-snapshot-validation-webhook" ]; + } // lib.optionalAttrs (config.serverAddr != "") { + serverAddr = config.serverAddr; + tokenFile = "/etc/rancher/rke2/node-token"; }; - }; - # Enable RKE2 - services.rke2 = { - enable = true; - - disable = [ - # Utilize Traefik - "rke2-ingress-nginx" - - # Utilize OpenEBS's Snapshot Controller - "rke2-snapshot-controller" - "rke2-snapshot-controller-crd" - "rke2-snapshot-validation-webhook" - ]; - - nodeLabel = [ - "openebs.io/engine=mayastor" - ]; - - role = "server"; - # ------------------- - # --- Server Node --- - # ------------------- - - # ------------------- - # --- Worker Node --- - # ------------------- - # role = "agent"; - # serverAddr = "https://10.0.0.10:6443" - # tokenFile = ""; - # agentTokenFile = ""; - }; - - # Enable SSH Server - services.openssh = { - enable = true; - settings = { - PasswordAuthentication = false; # Disable Password Login - PermitRootLogin = "prohibit-password"; # Disable Password Login + # Enable OpeniSCSI + services.openiscsi = { + enable = true; + name = "iqn.2025-01.${config.hostName}:initiator"; }; + + # Bootstrap Kubernetes Manifests + system.activationScripts.k8s-manifests = { + deps = [ ]; + text = '' + mkdir -p /var/lib/rancher/rke2/server/manifests + + # Base Configs + cp ${../k8s/longhorn.yaml} /var/lib/rancher/rke2/server/manifests/longhorn-base.yaml + # cp ${../k8s/kasten.yaml} /var/lib/rancher/rke2/server/manifests/kasten-base.yaml + ''; + }; + + # Add Symlinks Expected by Longhorn + system.activationScripts.add-symlinks = '' + mkdir -p /usr/bin + ln -sf ${pkgs.openiscsi}/bin/iscsiadm /usr/bin/iscsiadm + ln -sf ${pkgs.openiscsi}/bin/iscsid /usr/bin/iscsid + ''; }; - - # User Configuration - users.users.root = { - openssh.authorizedKeys.keys = [ - "ssh-rsa AAAAB3NzaC1yc2EAAAABJQAAAQEA8P84lWL/p13ZBFNwITm/dLWWL8s9pVmdOImM5gaJAiTLY+DheUvG6YsveB2/5STseiJ34g7Na9TW1mtTLL8zDqPvj3NbprQiYlLJKMbCk6dtfdD4nLMHl8B48e1h699XiZDp2/c+jJb0MkLOFrps+FbPqt7pFt1Pj29tFy8BCg0LGndu6KO+HqYS+aM5tp5hZESo1RReiJ8aHsu5X7wW46brN4gfyyu+8X4etSZAB9raWqlln9NKK7G6as6X+uPypvSjYGSTC8TSePV1iTPwOxPk2+1xBsK7EBLg3jNrrYaiXLnZvBOOhm11JmHzqEJ6386FfQO+0r4iDVxmvi+ojw== rsa-key-20141114" - ]; - hashedPassword = null; # Disable Password Login - }; - - # System Packages - environment.systemPackages = with pkgs; [ - htop - k9s - kubectl - kubernetes-helm - nfs-utils - vim - ]; - - # System State Version - system.stateVersion = "24.11"; } diff --git a/k8s/.gitignore b/k8s/.gitignore deleted file mode 100644 index 5dbb189..0000000 --- a/k8s/.gitignore +++ /dev/null @@ -1 +0,0 @@ -rke2-token diff --git a/k8s/ceph.yaml b/k8s/ceph.yaml new file mode 100644 index 0000000..cb81ca3 --- /dev/null +++ b/k8s/ceph.yaml @@ -0,0 +1,164 @@ +--- +# Namespace +apiVersion: v1 +kind: Namespace +metadata: + labels: + name: rook-ceph + name: rook-ceph + +--- +# HelpChart +apiVersion: helm.cattle.io/v1 +kind: HelmChart +metadata: + name: ceph + namespace: kube-system +spec: + repo: https://charts.rook.io/release + chart: rook-ceph + targetNamespace: rook-ceph + valuesContent: |- + enableDiscoveryDaemon: true + +--- +# CephCluster +apiVersion: ceph.rook.io/v1 +kind: CephCluster +metadata: + name: rook-ceph + namespace: rook-ceph +spec: + dataDirHostPath: /var/lib/rook + cephVersion: + image: quay.io/ceph/ceph:v19.2 + allowUnsupported: false + + # HA - One monitor per node + mon: + count: 3 + allowMultiplePerNode: false + + # Ceph Dashboard + dashboard: + enabled: true + ssl: true + + # Network Configuration + network: + provider: host + + # Storage Configuration + storage: + useAllNodes: true + useAllDevices: true + config: + osdsPerDevice: "1" + replicatedSize: "3" + + # Disruption Management + disruptionManagement: + managePodBudgets: true + osdMaintenanceTimeout: 30 + + # Resource Management + # resources: + # mgr: + # limits: + # cpu: "1000m" + # memory: "1Gi" + # requests: + # cpu: "500m" + # memory: "512Mi" + # mon: + # limits: + # cpu: "1000m" + # memory: "1Gi" + # requests: + # cpu: "500m" + # memory: "512Mi" + # osd: + # limits: + # cpu: "2000m" + # memory: "4Gi" + # requests: + # cpu: "1000m" + # memory: "2Gi" + +--- +# BlockPool - Single Replica +apiVersion: ceph.rook.io/v1 +kind: CephBlockPool +metadata: + name: ceph-block-pool-single + namespace: rook-ceph +spec: + failureDomain: host + replicated: + size: 1 + +--- +# BlockPool - Three Replica +apiVersion: ceph.rook.io/v1 +kind: CephBlockPool +metadata: + name: ceph-block-pool-triple + namespace: rook-ceph +spec: + failureDomain: host + replicated: + size: 3 + +--- +# StorageClass - Three Replica +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: ceph-block-triple + annotations: + storageclass.kubernetes.io/is-default-class: "true" +provisioner: rook-ceph.rbd.csi.ceph.com +parameters: + pool: ceph-block-pool-triple + clusterID: rook-ceph + imageFormat: "2" + imageFeatures: layering + + # Ceph CSI driver + csi.storage.k8s.io/provisioner-secret-name: rook-csi-rbd-provisioner + csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph + csi.storage.k8s.io/controller-expand-secret-name: rook-csi-rbd-provisioner + csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph + csi.storage.k8s.io/node-stage-secret-name: rook-csi-rbd-node + csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph + csi.storage.k8s.io/fstype: ext4 + +allowVolumeExpansion: true +volumeBindingMode: Immediate +reclaimPolicy: Delete + +--- +# StorageClass - Single Replica +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: ceph-block-single +provisioner: rook-ceph.rbd.csi.ceph.com +parameters: + pool: ceph-block-pool-single + clusterID: rook-ceph + imageFormat: "2" + imageFeatures: layering + + # Ceph CSI driver + csi.storage.k8s.io/provisioner-secret-name: rook-csi-rbd-provisioner + csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph + csi.storage.k8s.io/controller-expand-secret-name: rook-csi-rbd-provisioner + csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph + csi.storage.k8s.io/node-stage-secret-name: rook-csi-rbd-node + csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph + csi.storage.k8s.io/fstype: ext4 + +allowVolumeExpansion: true +volumeBindingMode: Immediate +reclaimPolicy: Delete diff --git a/k8s/default.nix b/k8s/default.nix deleted file mode 100644 index 7beee50..0000000 --- a/k8s/default.nix +++ /dev/null @@ -1,34 +0,0 @@ -{ config, lib, pkgs, ... }: - -{ - options.k8s = { - diskPoolID = lib.mkOption { - type = lib.types.str; - description = "Disk Pool ID for OpenEBS"; - }; - - manifestsDir = lib.mkOption { - type = lib.types.path; - description = "Directory for Kubernetes manifests"; - }; - }; - - config = { - system.activationScripts.k8s-manifests = { - deps = [ ]; - text = '' - mkdir -p ${config.k8s.manifestsDir} - - # Storage - OpenEBS - cp ${pkgs.substituteAll { - src = ./config/openebs.yaml; - nodeName = config.networking.hostName; - diskPoolID = config.k8s.diskPoolID; - }} ${config.k8s.manifestsDir}/openebs.yaml - - # Backup - Kasten - cp ${./config/kasten.yaml} ${config.k8s.manifestsDir}/kasten.yaml - ''; - }; - }; -} diff --git a/k8s/config/kasten.yaml b/k8s/kasten.yaml similarity index 52% rename from k8s/config/kasten.yaml rename to k8s/kasten.yaml index 644dcb8..ae38d63 100644 --- a/k8s/config/kasten.yaml +++ b/k8s/kasten.yaml @@ -45,7 +45,39 @@ spec: repo: https://charts.kasten.io/ chart: k10 targetNamespace: kasten - valuesContent: |- - global: - persistence: - storageClass: mayastor-r1 +--- +kind: Profile +apiVersion: config.kio.kasten.io/v1alpha1 +metadata: + name: k10-backup-profile + namespace: kasten +spec: + locationSpec: + type: FileStore + fileStore: + claimName: va-unraid-backup-rw + credential: + secretType: "" + secret: + apiVersion: "" + kind: "" + name: "" + namespace: "" + type: Location +--- +apiVersion: config.kio.kasten.io/v1alpha1 +kind: TransformSet +metadata: + name: storage-class-rename + namespace: kasten +spec: + comment: Renames cstor-r1 to ceph-block-triple + transforms: + - json: + - op: replace + path: /spec/storageClassName + value: ceph-block-triple + name: StorageClassRename + subject: + name: "" + resource: persistentvolumeclaims diff --git a/k8s/longhorn.yaml b/k8s/longhorn.yaml new file mode 100644 index 0000000..3ec96b2 --- /dev/null +++ b/k8s/longhorn.yaml @@ -0,0 +1,50 @@ +--- +# Namespace +apiVersion: v1 +kind: Namespace +metadata: + labels: + name: longhorn + name: longhorn + +--- +# HelpChart +apiVersion: helm.cattle.io/v1 +kind: HelmChart +metadata: + name: longhorn + namespace: kube-system +spec: + repo: https://charts.longhorn.io + chart: longhorn + targetNamespace: longhorn + valuesContent: |- + persistence: + defaultClass: true + defaultClassReplicaCount: 3 + reclaimPolicy: Delete + + defaultSettings: + defaultDataPath: /storage/longhorn + defaultReplicaCount: 3 + nodeDownPodDeletionPolicy: delete-both-statefulset-and-deployment-pod + guaranteedEngineManagerCPU: 0.25 + guaranteedReplicaManagerCPU: 0.25 + + longhornManager: + tolerations: + - key: "node-role.kubernetes.io/control-plane" + operator: "Exists" + effect: "NoSchedule" +--- +# StorageClass +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + name: longhorn-block-triple +provisioner: driver.longhorn.io +allowVolumeExpansion: true +parameters: + numberOfReplicas: "3" + staleReplicaTimeout: "2880" + fsType: "ext4" diff --git a/k8s/openebs-disk-pool.yaml b/k8s/openebs-disk-pool.yaml new file mode 100644 index 0000000..3dcf2d1 --- /dev/null +++ b/k8s/openebs-disk-pool.yaml @@ -0,0 +1,9 @@ +--- +apiVersion: "openebs.io/v1beta2" +kind: DiskPool +metadata: + name: pool-on-@hostName@ + namespace: openebs +spec: + node: @hostName@ + disks: ["aio://@dataDiskID@"] diff --git a/k8s/config/openebs.yaml b/k8s/openebs.yaml similarity index 82% rename from k8s/config/openebs.yaml rename to k8s/openebs.yaml index 10e9913..0fce804 100644 --- a/k8s/config/openebs.yaml +++ b/k8s/openebs.yaml @@ -29,15 +29,6 @@ spec: mayastor: enabled: true --- -apiVersion: "openebs.io/v1beta2" -kind: DiskPool -metadata: - name: pool-on-@nodeName@ - namespace: openebs -spec: - node: @nodeName@ - disks: ["aio://@diskPoolID@"] ---- apiVersion: storage.k8s.io/v1 kind: StorageClass metadata: @@ -51,11 +42,11 @@ provisioner: io.openebs.csi-mayastor apiVersion: storage.k8s.io/v1 kind: StorageClass metadata: - name: mayastor-r1 + name: mayastor-r3 annotations: storageclass.kubernetes.io/is-default-class: "true" allowVolumeExpansion: true parameters: protocol: nvmf - repl: "1" + repl: "3" provisioner: io.openebs.csi-mayastor diff --git a/lib/common-system.nix b/lib/common-system.nix new file mode 100644 index 0000000..ace5e65 --- /dev/null +++ b/lib/common-system.nix @@ -0,0 +1,43 @@ +{ config, lib, ... }: +{ + # Node Nix Config + options = { + hostName = lib.mkOption { + type = lib.types.str; + description = "The node hostname"; + }; + }; + + config = { + # Basic System + system.stateVersion = "24.11"; + nix.settings.experimental-features = [ "nix-command" "flakes" ]; + networking.hostName = config.hostName; + + # Boot Loader Options + boot.loader = { + systemd-boot.enable = true; + efi = { + canTouchEfiVariables = true; + efiSysMountPoint = "/boot"; + }; + }; + + # Enable SSH + services.openssh = { + enable = true; + settings = { + PasswordAuthentication = false; + PermitRootLogin = "prohibit-password"; + }; + }; + + # User Authorized Keys + users.users.root = { + openssh.authorizedKeys.keys = [ + "ssh-rsa AAAAB3NzaC1yc2EAAAABJQAAAQEA8P84lWL/p13ZBFNwITm/dLWWL8s9pVmdOImM5gaJAiTLY+DheUvG6YsveB2/5STseiJ34g7Na9TW1mtTLL8zDqPvj3NbprQiYlLJKMbCk6dtfdD4nLMHl8B48e1h699XiZDp2/c+jJb0MkLOFrps+FbPqt7pFt1Pj29tFy8BCg0LGndu6KO+HqYS+aM5tp5hZESo1RReiJ8aHsu5X7wW46brN4gfyyu+8X4etSZAB9raWqlln9NKK7G6as6X+uPypvSjYGSTC8TSePV1iTPwOxPk2+1xBsK7EBLg3jNrrYaiXLnZvBOOhm11JmHzqEJ6386FfQO+0r4iDVxmvi+ojw== rsa-key-20141114" + ]; + hashedPassword = null; + }; + }; +} diff --git a/lib/disk-config.nix b/lib/disk-config.nix new file mode 100644 index 0000000..d54867e --- /dev/null +++ b/lib/disk-config.nix @@ -0,0 +1,43 @@ +{ config, lib, ... }: { + options = { + mainDiskID = lib.mkOption { + type = lib.types.str; + description = "Device path for the main disk"; + example = "/dev/disk/by-id/ata-VBOX_HARDDISK_VBcd9425b8-d666f9b8"; + }; + }; + + config = { + disko.devices = { + disk = { + main = { + type = "disk"; + device = config.mainDiskID; + content = { + type = "gpt"; + partitions = { + boot = { + size = "512M"; + type = "EF00"; + content = { + type = "filesystem"; + format = "vfat"; + mountpoint = "/boot"; + mountOptions = [ "umask=0077" ]; + }; + }; + root = { + size = "100%"; + content = { + type = "filesystem"; + format = "ext4"; + mountpoint = "/"; + }; + }; + }; + }; + }; + }; + }; + }; +}