chore(nixos/llama-swap): remove synthetic peer and tune local model args

This commit is contained in:
2026-04-30 11:43:04 -04:00
parent 31363f5f8d
commit 93e2247a30
2 changed files with 16 additions and 21 deletions

View File

@@ -186,6 +186,7 @@ in
--port ''${PORT} \ --port ''${PORT} \
-m /mnt/ssd/Models/Qwen3.6/Qwen3.6-27B-IQ4_XS.gguf \ -m /mnt/ssd/Models/Qwen3.6/Qwen3.6-27B-IQ4_XS.gguf \
-c ''${ctx} \ -c ''${ctx} \
--parallel 2 \
--temp 0.6 \ --temp 0.6 \
--top-p 0.95 \ --top-p 0.95 \
--top-k 20 \ --top-k 20 \
@@ -193,11 +194,20 @@ in
--presence-penalty 1.5 \ --presence-penalty 1.5 \
-ctk q8_0 \ -ctk q8_0 \
-ctv q8_0 \ -ctv q8_0 \
--keep 3000 \
--batch-size 4096 \
--ubatch-size 1024 \
--spec-type ngram-mod \
--spec-ngram-mod-n-match 24 \
--spec-draft-n-min 16 \
--spec-draft-n-max 64 \
-dev CUDA0 \ -dev CUDA0 \
-fit off \ -fit off \
--chat-template-kwargs "{\"preserve_thinking\": true}" --chat-template-kwargs "{\"preserve_thinking\": true}"
''; '';
# --chat-template-kwargs "{\"enable_thinking\": false}" # --chat-template-kwargs "{\"enable_thinking\": false}"
# --spec-draft-n-min 16 \
# --spec-draft-n-max 32 \
metadata = { metadata = {
type = [ type = [
"text-generation" "text-generation"
@@ -436,20 +446,4 @@ in
}; };
}; };
}; };
peers = {
synthetic = {
proxy = "https://api.synthetic.new/openai/";
models = [
"hf:MiniMaxAI/MiniMax-M2.1"
"hf:MiniMaxAI/MiniMax-M2.5"
"hf:moonshotai/Kimi-K2.5"
"hf:moonshotai/Kimi-K2-Instruct-0905"
"hf:moonshotai/Kimi-K2-Thinking"
"hf:openai/gpt-oss-120b"
"hf:Qwen/Qwen3.5-397B-A17B"
"hf:zai-org/GLM-4.7"
];
};
};
} }

View File

@@ -94,11 +94,12 @@ in
owner = "llama-swap"; owner = "llama-swap";
group = "llama-swap"; group = "llama-swap";
mode = "0400"; mode = "0400";
content = builtins.toJSON ( content = builtins.toJSON cfg.config;
recursiveUpdate cfg.config { # content = builtins.toJSON (
peers.synthetic.apiKey = config.sops.placeholder.synthetic_apikey; # recursiveUpdate cfg.config {
} # peers.synthetic.apiKey = config.sops.placeholder.synthetic_apikey;
); # }
# );
}; };
}; };