feat(llama-swap): add Qwen3.6-27B MTP thinking model and bump llama-cpp to b9045

Add qwen3.6-27b-mtp-thinking model config with 150K context, MTP speculative decoding, and thinking mode support. Bump llama-cpp from b9009 to b9045 and apply MTP patch from upstream PR #22673.
2026-05-06 12:49:49 -04:00
parent f6f17831bf
commit fea5cc887d
2 changed files with 49 additions and 12 deletions
--- a/packages/llama-cpp/default.nix
+++ b/packages/llama-cpp/default.nix
@@ -7,12 +7,12 @@
  vulkanSupport = true;
 }).overrideAttrs
  (oldAttrs: rec {
-    version = "9009";
+    version = "9045";
    src = pkgs.fetchFromGitHub {
      owner = "ggml-org";
      repo = "llama.cpp";
      tag = "b${version}";
-      hash = "sha256-rDnO+NIjXYtMqxOeYvecQsB7KgohVUuMV4jG6L0b4S4=";
+      hash = "sha256-fdHGxJaMx/VG7twXdWvHdkThAOSFJTbjAnpRxsNx5l0=";
      leaveDotGit = true;
      postFetch = ''
        git -C "$out" rev-parse --short HEAD > $out/COMMIT
@@ -36,4 +36,14 @@
      export NIX_ENFORCE_NO_NATIVE=0
      ${oldAttrs.preConfigure or ""}
    '';
+
+    # Apply Patches
+    patchFlags = [ "-p1" ];
+    patches = (oldAttrs.patches or [ ]) ++ [
+      (pkgs.fetchpatch {
+        name = "mtp.patch";
+        url = "https://github.com/ggml-org/llama.cpp/pull/22673.patch";
+        hash = "sha256-jM4X+jy7JhOAn2v/U9mmWM/507DKaW8d/dhR78HZWFQ=";
+      })
+    ];
  })