chore(nixos/llama-swap): remove synthetic peer and tune local model args

This commit is contained in:
2026-04-30 11:43:04 -04:00
parent 31363f5f8d
commit 93e2247a30
2 changed files with 16 additions and 21 deletions

View File

@@ -186,6 +186,7 @@ in
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3.6/Qwen3.6-27B-IQ4_XS.gguf \
-c ''${ctx} \
--parallel 2 \
--temp 0.6 \
--top-p 0.95 \
--top-k 20 \
@@ -193,11 +194,20 @@ in
--presence-penalty 1.5 \
-ctk q8_0 \
-ctv q8_0 \
--keep 3000 \
--batch-size 4096 \
--ubatch-size 1024 \
--spec-type ngram-mod \
--spec-ngram-mod-n-match 24 \
--spec-draft-n-min 16 \
--spec-draft-n-max 64 \
-dev CUDA0 \
-fit off \
--chat-template-kwargs "{\"preserve_thinking\": true}"
'';
# --chat-template-kwargs "{\"enable_thinking\": false}"
# --spec-draft-n-min 16 \
# --spec-draft-n-max 32 \
metadata = {
type = [
"text-generation"
@@ -436,20 +446,4 @@ in
};
};
};
peers = {
synthetic = {
proxy = "https://api.synthetic.new/openai/";
models = [
"hf:MiniMaxAI/MiniMax-M2.1"
"hf:MiniMaxAI/MiniMax-M2.5"
"hf:moonshotai/Kimi-K2.5"
"hf:moonshotai/Kimi-K2-Instruct-0905"
"hf:moonshotai/Kimi-K2-Thinking"
"hf:openai/gpt-oss-120b"
"hf:Qwen/Qwen3.5-397B-A17B"
"hf:zai-org/GLM-4.7"
];
};
};
}