fix(llama-swap): reduce qwen3.6 dual context

This commit is contained in:
2026-05-20 00:11:08 -04:00
parent a7941db240
commit 79884e8a77

View File

@@ -650,7 +650,7 @@ in
# https://huggingface.co/unsloth/Qwen3.6-27B-GGUF-MTP/tree/main
"qwen3.6-27b-dual" = {
name = "Qwen3.6 27B (Dual GPU, UD-Q6)";
macros.ctx = "196608";
macros.ctx = "180000";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \