chore(llama-swap): bump llama-cpp to b9048 and swap in UD-Q4/Q6 MTP configs

Replace qwen3.6-27b-thinking and qwen3.6-27b-mtp-thinking with qwen3.6-27b-udq4-thinking (single GPU) and qwen3.6-27b-udq6-thinking (dual GPU). Update aliases and concurrent set accordingly.
2026-05-11 15:26:39 -04:00
parent 1879e98ebc
commit 9ec2d61fcc
2 changed files with 42 additions and 53 deletions
--- a/packages/llama-cpp/default.nix
+++ b/packages/llama-cpp/default.nix
@@ -7,12 +7,12 @@
  vulkanSupport = true;
 }).overrideAttrs
  (oldAttrs: rec {
-    version = "9045";
+    version = "9048";
    src = pkgs.fetchFromGitHub {
      owner = "ggml-org";
      repo = "llama.cpp";
      tag = "b${version}";
-      hash = "sha256-fdHGxJaMx/VG7twXdWvHdkThAOSFJTbjAnpRxsNx5l0=";
+      hash = "sha256-lYtX0hLReCnFw1+xOKefly+WunuoN89ZFEFl5mK5pQ4=";
      leaveDotGit = true;
      postFetch = ''
        git -C "$out" rev-parse --short HEAD > $out/COMMIT
@@ -43,7 +43,7 @@
      (pkgs.fetchpatch {
        name = "mtp.patch";
        url = "https://github.com/ggml-org/llama.cpp/pull/22673.patch";
-        hash = "sha256-jM4X+jy7JhOAn2v/U9mmWM/507DKaW8d/dhR78HZWFQ=";
+        hash = "sha256-HqpchhOpxuw5mY4a/OCWGDr2Y32rC4FeOHuhaVt+mvY=";
      })
    ];
  })