diff --git a/modules/nixos/services/llama-cpp/default.nix b/modules/nixos/services/llama-cpp/default.nix index 38b6e2b..0d64bfa 100644 --- a/modules/nixos/services/llama-cpp/default.nix +++ b/modules/nixos/services/llama-cpp/default.nix @@ -1,4 +1,9 @@ -{ config, pkgs, lib, namespace, ... }: +{ config +, pkgs +, lib +, namespace +, ... +}: let inherit (lib) types mkIf mkEnableOption; inherit (lib.${namespace}) mkOpt; @@ -40,7 +45,10 @@ in description = "Download Model"; wantedBy = [ "multi-user.target" ]; before = [ "llama-cpp.service" ]; - path = [ pkgs.curl pkgs.coreutils ]; + path = [ + pkgs.curl + pkgs.coreutils + ]; serviceConfig = { Type = "oneshot"; RemainAfterExit = true; @@ -85,22 +93,28 @@ in openFirewall = true; model = "${modelPath}"; - package = (pkgs.llama-cpp.override { - cudaSupport = true; - }).overrideAttrs (oldAttrs: { - cmakeFlags = oldAttrs.cmakeFlags ++ [ - "-DGGML_CUDA_ENABLE_UNIFIED_MEMORY=1" - "-DCMAKE_CUDA_ARCHITECTURES=61" # GTX-1070 + package = + (pkgs.llama-cpp.override { + cudaSupport = true; + blasSupport = true; + rocmSupport = false; + metalSupport = false; + }).overrideAttrs + (oldAttrs: { + cmakeFlags = oldAttrs.cmakeFlags ++ [ + "-DGGML_CUDA_ENABLE_UNIFIED_MEMORY=1" + "-DCMAKE_CUDA_ARCHITECTURES=61" # GTX-1070 / GTX-1080ti + "-DGGML_NATIVE=ON" - # Disable CPU Instructions - Intel(R) Core(TM) i5-3570K CPU @ 3.40GHz - "-DLLAMA_FMA=OFF" - "-DLLAMA_AVX2=OFF" - "-DLLAMA_AVX512=OFF" - "-DGGML_FMA=OFF" - "-DGGML_AVX2=OFF" - "-DGGML_AVX512=OFF" - ]; - }); + # Disable CPU Instructions - Intel(R) Core(TM) i5-3570K CPU @ 3.40GHz + # "-DLLAMA_FMA=OFF" + # "-DLLAMA_AVX2=OFF" + # "-DLLAMA_AVX512=OFF" + # "-DGGML_FMA=OFF" + # "-DGGML_AVX2=OFF" + # "-DGGML_AVX512=OFF" + ]; + }); extraFlags = [ availableModels.${cfg.modelName}.flag ]; }; diff --git a/modules/nixos/virtualisation/podman/default.nix b/modules/nixos/virtualisation/podman/default.nix index cd4b066..21bd1c0 100644 --- a/modules/nixos/virtualisation/podman/default.nix +++ b/modules/nixos/virtualisation/podman/default.nix @@ -1,4 +1,9 @@ -{ config, lib, pkgs, namespace, ... }: +{ config +, lib +, pkgs +, namespace +, ... +}: let inherit (lib) mkIf; diff --git a/systems/x86_64-linux/lin-va-desktop/default.nix b/systems/x86_64-linux/lin-va-desktop/default.nix new file mode 100755 index 0000000..c80920b --- /dev/null +++ b/systems/x86_64-linux/lin-va-desktop/default.nix @@ -0,0 +1,156 @@ +{ namespace +, pkgs +, lib +, ... +}: +let + inherit (lib.${namespace}) enabled; + +in +{ + system.stateVersion = "25.11"; + time.timeZone = "America/New_York"; + hardware.nvidia-container-toolkit.enable = true; + + nixpkgs.config = { + allowUnfree = true; + packageOverrides = pkgs: { + llama-cpp = + (pkgs.llama-cpp.override { + cudaSupport = true; + blasSupport = true; + rocmSupport = false; + metalSupport = false; + }).overrideAttrs + (oldAttrs: rec { + version = "7213"; + src = pkgs.fetchFromGitHub { + owner = "ggml-org"; + repo = "llama.cpp"; + tag = "b${version}"; + hash = "sha256-DVP1eGuSbKd0KxmrDCVAdOe16r1wJ9+RonbrUTgVS7U="; + leaveDotGit = true; + postFetch = '' + git -C "$out" rev-parse --short HEAD > $out/COMMIT + find "$out" -name .git -print0 | xargs -0 rm -rf + ''; + }; + # Auto CPU Optimizations + cmakeFlags = (oldAttrs.cmakeFlags or [ ]) ++ [ + "-DGGML_NATIVE=ON" + "-DGGML_CUDA_ENABLE_UNIFIED_MEMORY=1" + "-DCMAKE_CUDA_ARCHITECTURES=61" # GTX 1070 / GTX 1080ti + ]; + # Disable Nix's march=native Stripping + preConfigure = '' + export NIX_ENFORCE_NO_NATIVE=0 + ${oldAttrs.preConfigure or ""} + ''; + }); + }; + }; + + fileSystems."/mnt/ssd" = { + device = "/dev/disk/by-id/ata-Samsung_SSD_870_EVO_1TB_S6PTNZ0R620739L-part1"; + fsType = "exfat"; + options = [ + "uid=1000" + "gid=100" + "umask=0022" + ]; + }; + + networking.firewall = { + allowedTCPPorts = [ 8081 ]; + }; + + # System Config + reichard = { + nix = enabled; + + system = { + boot = { + enable = true; + silentBoot = true; + enableSystemd = true; + enableGrub = false; + }; + disk = { + enable = true; + diskPath = "/dev/sdc"; + }; + networking = { + enable = true; + useStatic = { + interface = "enp3s0"; + address = "10.0.20.100"; + defaultGateway = "10.0.20.254"; + nameservers = [ "10.0.20.20" ]; + }; + }; + }; + + hardware = { + opengl = { + enable = true; + enableNvidia = true; + }; + }; + + services = { + openssh = enabled; + }; + + virtualisation = { + podman = enabled; + }; + + }; + + services.llama-swap = { + enable = true; + openFirewall = true; + settings = { + models = { + # https://huggingface.co/ggml-org/gpt-oss-20b-GGUF/tree/main + "gpt-oss-20b" = { + name = "GPT OSS (20B) - Thinking"; + cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/gpt-oss-20b-MXFP4.gguf --ctx-size 128000"; + }; + + # https://huggingface.co/ggml-org/Qwen2.5-Coder-7B-Q8_0-GGUF/tree/main + "qwen2.5-coder-7b-fim" = { + name = "Qwen2.5 Coder (7B) - FIM"; + cmd = "${pkgs.llama-cpp}/bin/llama-server -m /mnt/ssd/Models/qwen2.5-coder-7b-q8_0.gguf --fim-qwen-7b-default --port \${PORT}"; + }; + + # https://huggingface.co/unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF/tree/main + "qwen3-30b-a3b-q4-k-m" = { + name = "Qwen3 A3B 2507 (30B) - Instruct"; + cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf --ctx-size 8192 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20"; + }; + + # https://huggingface.co/unsloth/Qwen3-30B-A3B-Thinking-2507-GGUF/tree/main + "qwen3-30b-a3b-q4-thinking" = { + name = "Qwen3 A3B 2507 (30B) - Thinking"; + cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-30B-A3B-Thinking-2507-Q4_K_M.gguf --ctx-size 8192 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20"; + }; + + # https://huggingface.co/unsloth/Qwen3-VL-8B-Instruct-GGUF/tree/main + "qwen3-8b-vision" = { + name = "Qwen3 Vision (8B) - Thinking"; + cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-VL-8B-Instruct-UD-Q4_K_XL.gguf --mmproj /mnt/ssd/Models/Qwen3-VL-8B-Instruct-UD-Q4_K_XL_mmproj-F16.gguf --ctx-size 65536 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20"; + }; + }; + }; + }; + + # System Packages + environment.systemPackages = with pkgs; [ + btop + git + tmux + vim + llama-cpp + ]; +}