feat(llama-swap): add ik-llama-cpp package and Qwen3.6-27B MTP config

Add ikawrakow/ik_llama.cpp as a new package with CUDA/Vulkan support, enabling MTP (Multi-Token Prediction) and IQ4_KS quantization. Wire it into llama-swap with a new 'ik-qwen3.6-27b-iq4ks-thinking' model config and 'iq36' alias. Also add a chat template download to the vLLM setup script and include the binary on lin-va-desktop.
2026-05-12 15:58:11 -04:00
parent a01f9e34ee
commit 328bb6e1db
4 changed files with 95 additions and 8 deletions
--- a/systems/x86_64-linux/lin-va-desktop/default.nix
+++ b/systems/x86_64-linux/lin-va-desktop/default.nix
@@ -9,6 +9,7 @@ let

  nvidia-smi = "${config.hardware.nvidia.package.bin}/bin/nvidia-smi";
  llama-cpp = pkgs.reichard.llama-cpp;
+  ik-llama-cpp = pkgs.reichard.ik-llama-cpp;
  stable-diffusion-cpp = pkgs.reichard.stable-diffusion-cpp.override {
    cudaSupport = true;
  };
@@ -129,6 +130,7 @@ in

    # Local Packages
    llama-cpp
+    ik-llama-cpp
    stable-diffusion-cpp
  ];
 }