config(llama-swap): enable preserve_thinking in chat template kwargs
This commit is contained in:
@@ -140,7 +140,8 @@ in
|
|||||||
--presence-penalty 0.0 \
|
--presence-penalty 0.0 \
|
||||||
--repeat-penalty 1.0 \
|
--repeat-penalty 1.0 \
|
||||||
-dev CUDA0 \
|
-dev CUDA0 \
|
||||||
-fit off
|
-fit off \
|
||||||
|
--chat-template-kwargs "{\"preserve_thinking\": true}"
|
||||||
'';
|
'';
|
||||||
metadata = {
|
metadata = {
|
||||||
type = [
|
type = [
|
||||||
@@ -168,7 +169,6 @@ in
|
|||||||
-dev CUDA0 \
|
-dev CUDA0 \
|
||||||
-fit off
|
-fit off
|
||||||
'';
|
'';
|
||||||
# --chat-template-kwargs "{\"enable_thinking\": false}"
|
|
||||||
metadata = {
|
metadata = {
|
||||||
type = [
|
type = [
|
||||||
"text-generation"
|
"text-generation"
|
||||||
@@ -194,7 +194,8 @@ in
|
|||||||
-ctk q8_0 \
|
-ctk q8_0 \
|
||||||
-ctv q8_0 \
|
-ctv q8_0 \
|
||||||
-dev CUDA0 \
|
-dev CUDA0 \
|
||||||
-fit off
|
-fit off \
|
||||||
|
--chat-template-kwargs "{\"preserve_thinking\": true}"
|
||||||
'';
|
'';
|
||||||
# --chat-template-kwargs "{\"enable_thinking\": false}"
|
# --chat-template-kwargs "{\"enable_thinking\": false}"
|
||||||
metadata = {
|
metadata = {
|
||||||
|
|||||||
Reference in New Issue
Block a user