chore(llama-swap): tune presence penalty to 1.5 and remove repeat penalty

This commit is contained in:
2026-05-07 20:37:54 -04:00
parent d142c5ff7e
commit f3cc67b17d

View File

@@ -44,8 +44,7 @@ in
--top-p 0.95 \
--top-k 20 \
--min-p 0.0 \
--presence-penalty 0.0 \
--repeat-penalty 1.0 \
--presence-penalty 1.5 \
-dev CUDA0 \
-fit off \
--chat-template-kwargs "{\"preserve_thinking\": true}"