fix(llama-swap): reduce qwen3.6 dual context
This commit is contained in:
@@ -650,7 +650,7 @@ in
|
|||||||
# https://huggingface.co/unsloth/Qwen3.6-27B-GGUF-MTP/tree/main
|
# https://huggingface.co/unsloth/Qwen3.6-27B-GGUF-MTP/tree/main
|
||||||
"qwen3.6-27b-dual" = {
|
"qwen3.6-27b-dual" = {
|
||||||
name = "Qwen3.6 27B (Dual GPU, UD-Q6)";
|
name = "Qwen3.6 27B (Dual GPU, UD-Q6)";
|
||||||
macros.ctx = "196608";
|
macros.ctx = "180000";
|
||||||
cmd = ''
|
cmd = ''
|
||||||
${llama-cpp}/bin/llama-server \
|
${llama-cpp}/bin/llama-server \
|
||||||
--port ''${PORT} \
|
--port ''${PORT} \
|
||||||
|
|||||||
Reference in New Issue
Block a user