From 682b7d8b4b6c9a9b7b4aa8e4e27730a7f8ecaca6 Mon Sep 17 00:00:00 2001
From: Evan Reichard <evan@reichard.io>
Date: Tue, 3 Feb 2026 20:41:55 -0500
Subject: [PATCH] feat(llama-swap): add Qwen3 Coder Next 80B model
 configuration

Add new model "Qwen3 Coder Next (80B) - Instruct" with 262144 context window
and optimized parameters for coding tasks. Uses CUDA unified memory support.
---
 modules/nixos/services/llama-swap/config.nix | 23 ++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/modules/nixos/services/llama-swap/config.nix b/modules/nixos/services/llama-swap/config.nix
index 57178a9..4de022e 100644
--- a/modules/nixos/services/llama-swap/config.nix
+++ b/modules/nixos/services/llama-swap/config.nix
@@ -147,6 +147,29 @@ in
       env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
     };
 
+    # https://huggingface.co/unsloth/Qwen3-Coder-Next-GGUF/tree/main
+    "qwen3-coder-next-80b-instruct" = {
+      name = "Qwen3 Coder Next (80B) - Instruct";
+      macros.ctx = "262144";
+      cmd = ''
+        ${llama-cpp}/bin/llama-server \
+          --port ''${PORT} \
+          -m /mnt/ssd/Models/Qwen3/Qwen3-Coder-Next-UD-Q2_K_XL.gguf \
+          -c ''${ctx} \
+          --temp 1.0 \
+          --min-p 0.01 \
+          --top-p 0.95 \
+          --top-k 40 \
+          -ctk q8_0 \
+          -ctv q8_0 \
+          -fit off
+      '';
+      metadata = {
+        type = [ "text-generation" ];
+      };
+      env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
+    };
+
     # https://huggingface.co/unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF/tree/main
     "qwen3-30b-2507-instruct" = {
       name = "Qwen3 2507 (30B) - Instruct";