chore: update model paths
This commit is contained in:
@@ -23,6 +23,7 @@ in
|
|||||||
terminal = {
|
terminal = {
|
||||||
bash = enabled;
|
bash = enabled;
|
||||||
btop = enabled;
|
btop = enabled;
|
||||||
|
direnv = enabled;
|
||||||
tmux = enabled;
|
tmux = enabled;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -97,8 +97,9 @@ in
|
|||||||
cmd = ''
|
cmd = ''
|
||||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||||
--port ''${PORT} \
|
--port ''${PORT} \
|
||||||
-m /mnt/ssd/Models/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL.gguf \
|
-m /mnt/ssd/Models/Devstral/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL.gguf \
|
||||||
--chat-template-file /mnt/ssd/Models/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL_template.jinja \
|
--chat-template-file /mnt/ssd/Models/Devstral/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL_template.jinja \
|
||||||
|
--temp 0.15 \
|
||||||
-c 98304 \
|
-c 98304 \
|
||||||
-ctk q8_0 \
|
-ctk q8_0 \
|
||||||
-ctv q8_0 \
|
-ctv q8_0 \
|
||||||
@@ -107,14 +108,30 @@ in
|
|||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
|
# https://huggingface.co/mradermacher/gpt-oss-20b-heretic-v2-i1-GGUF/tree/main
|
||||||
|
# --chat-template-kwargs '{\"reasoning_effort\":\"low\"}'
|
||||||
|
"gpt-oss-20b-thinking" = {
|
||||||
|
name = "GPT OSS (20B) - Thinking";
|
||||||
|
cmd = ''
|
||||||
|
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||||
|
--port ''${PORT} \
|
||||||
|
-m /mnt/ssd/Models/GPT-OSS/gpt-oss-20b-heretic-v2.i1-MXFP4_MOE.gguf \
|
||||||
|
-c 131072 \
|
||||||
|
--temp 1.0 \
|
||||||
|
--top-p 1.0 \
|
||||||
|
--top-k 40 \
|
||||||
|
-dev CUDA0
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
# https://huggingface.co/unsloth/Qwen3-Next-80B-A3B-Instruct-GGUF/tree/main
|
# https://huggingface.co/unsloth/Qwen3-Next-80B-A3B-Instruct-GGUF/tree/main
|
||||||
"qwen3-next-80b-instruct" = {
|
"qwen3-next-80b-instruct" = {
|
||||||
name = "Qwen3 Next (80B) - Instruct";
|
name = "Qwen3 Next (80B) - Instruct";
|
||||||
cmd = ''
|
cmd = ''
|
||||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||||
--port ''${PORT} \
|
--port ''${PORT} \
|
||||||
-m /mnt/ssd/Models/Qwen3-Next-80B-A3B-Instruct-UD-Q4_K_XL.gguf \
|
-m /mnt/ssd/Models/Qwen3/Qwen3-Next-80B-A3B-Instruct-UD-Q2_K_XL.gguf \
|
||||||
-c 131072 \
|
-c 262144 \
|
||||||
--temp 0.7 \
|
--temp 0.7 \
|
||||||
--min-p 0.0 \
|
--min-p 0.0 \
|
||||||
--top-p 0.8 \
|
--top-p 0.8 \
|
||||||
@@ -122,10 +139,25 @@ in
|
|||||||
--repeat-penalty 1.05 \
|
--repeat-penalty 1.05 \
|
||||||
-ctk q8_0 \
|
-ctk q8_0 \
|
||||||
-ctv q8_0 \
|
-ctv q8_0 \
|
||||||
-fit off \
|
-fit off
|
||||||
-ncmoe 15 \
|
|
||||||
-ts 77,23
|
|
||||||
'';
|
'';
|
||||||
|
|
||||||
|
# cmd = ''
|
||||||
|
# ${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||||
|
# --port ''${PORT} \
|
||||||
|
# -m /mnt/ssd/Models/Qwen3/Qwen3-Next-80B-A3B-Instruct-UD-Q4_K_XL.gguf \
|
||||||
|
# -c 131072 \
|
||||||
|
# --temp 0.7 \
|
||||||
|
# --min-p 0.0 \
|
||||||
|
# --top-p 0.8 \
|
||||||
|
# --top-k 20 \
|
||||||
|
# --repeat-penalty 1.05 \
|
||||||
|
# -ctk q8_0 \
|
||||||
|
# -ctv q8_0 \
|
||||||
|
# -fit off \
|
||||||
|
# -ncmoe 15 \
|
||||||
|
# -ts 77,23
|
||||||
|
# '';
|
||||||
};
|
};
|
||||||
|
|
||||||
# https://huggingface.co/unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF/tree/main
|
# https://huggingface.co/unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF/tree/main
|
||||||
@@ -134,7 +166,7 @@ in
|
|||||||
cmd = ''
|
cmd = ''
|
||||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||||
--port ''${PORT} \
|
--port ''${PORT} \
|
||||||
-m /mnt/ssd/Models/Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf \
|
-m /mnt/ssd/Models/Qwen3/Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf \
|
||||||
-c 262144 \
|
-c 262144 \
|
||||||
--temp 0.7 \
|
--temp 0.7 \
|
||||||
--min-p 0.0 \
|
--min-p 0.0 \
|
||||||
@@ -153,7 +185,7 @@ in
|
|||||||
cmd = ''
|
cmd = ''
|
||||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||||
--port ''${PORT} \
|
--port ''${PORT} \
|
||||||
-m /mnt/ssd/Models/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf \
|
-m /mnt/ssd/Models/Qwen3/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf \
|
||||||
-c 262144 \
|
-c 262144 \
|
||||||
--temp 0.7 \
|
--temp 0.7 \
|
||||||
--min-p 0.0 \
|
--min-p 0.0 \
|
||||||
@@ -172,7 +204,7 @@ in
|
|||||||
cmd = ''
|
cmd = ''
|
||||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||||
--port ''${PORT} \
|
--port ''${PORT} \
|
||||||
-m /mnt/ssd/Models/Qwen3-30B-A3B-Thinking-2507-UD-Q4_K_XL.gguf \
|
-m /mnt/ssd/Models/Qwen3/Qwen3-30B-A3B-Thinking-2507-UD-Q4_K_XL.gguf \
|
||||||
-c 262144 \
|
-c 262144 \
|
||||||
--temp 0.7 \
|
--temp 0.7 \
|
||||||
--min-p 0.0 \
|
--min-p 0.0 \
|
||||||
@@ -185,19 +217,17 @@ in
|
|||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
# https://huggingface.co/mradermacher/gpt-oss-20b-heretic-v2-i1-GGUF/tree/main
|
# https://huggingface.co/unsloth/Nemotron-3-Nano-30B-A3B-GGUF/tree/main
|
||||||
# --chat-template-kwargs '{\"reasoning_effort\":\"low\"}'
|
"nemotron-3-nano-30b-thinking" = {
|
||||||
"gpt-oss-20b-thinking" = {
|
name = "Nemotron 3 Nano (30B) - Thinking";
|
||||||
name = "GPT OSS (20B) - Thinking";
|
|
||||||
cmd = ''
|
cmd = ''
|
||||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||||
--port ''${PORT} \
|
--port ''${PORT} \
|
||||||
-m /mnt/ssd/Models/gpt-oss-20b-heretic-v2.i1-MXFP4_MOE.gguf \
|
-m /mnt/ssd/Models/Nemotron/Nemotron-3-Nano-30B-A3B-UD-Q4_K_XL.gguf \
|
||||||
-c 131072 \
|
-c 1048576 \
|
||||||
--temp 1.0 \
|
--temp 1.1 \
|
||||||
--top-p 1.0 \
|
--top-p 0.95 \
|
||||||
--top-k 40 \
|
-fit off
|
||||||
-dev CUDA0
|
|
||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -208,7 +238,7 @@ in
|
|||||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||||
--port ''${PORT} \
|
--port ''${PORT} \
|
||||||
-m /mnt/ssd/Models/Qwen3-VL-8B-Instruct-UD-Q4_K_XL.gguf \
|
-m /mnt/ssd/Models/Qwen3-VL-8B-Instruct-UD-Q4_K_XL.gguf \
|
||||||
--mmproj /mnt/ssd/Models/Qwen3-VL-8B-Instruct-UD-Q4_K_XL_mmproj-F16.gguf \
|
--mmproj /mnt/ssd/Models/Qwen3/Qwen3-VL-8B-Instruct-UD-Q4_K_XL_mmproj-F16.gguf \
|
||||||
-c 65536 \
|
-c 65536 \
|
||||||
--temp 0.7 \
|
--temp 0.7 \
|
||||||
--min-p 0.0 \
|
--min-p 0.0 \
|
||||||
@@ -225,7 +255,7 @@ in
|
|||||||
name = "Qwen2.5 Coder (7B) - Instruct";
|
name = "Qwen2.5 Coder (7B) - Instruct";
|
||||||
cmd = ''
|
cmd = ''
|
||||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||||
-m /mnt/ssd/Models/Qwen2.5-Coder-7B-Instruct-Q8_0.gguf \
|
-m /mnt/ssd/Models/Qwen2.5/Qwen2.5-Coder-7B-Instruct-Q8_0.gguf \
|
||||||
--fim-qwen-7b-default \
|
--fim-qwen-7b-default \
|
||||||
-c 131072 \
|
-c 131072 \
|
||||||
--port ''${PORT} \
|
--port ''${PORT} \
|
||||||
@@ -238,7 +268,7 @@ in
|
|||||||
name = "Qwen2.5 Coder (3B) - Instruct";
|
name = "Qwen2.5 Coder (3B) - Instruct";
|
||||||
cmd = ''
|
cmd = ''
|
||||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||||
-m /mnt/ssd/Models/Qwen2.5-Coder-3B-Instruct-Q8_0.gguf \
|
-m /mnt/ssd/Models/Qwen2.5/Qwen2.5-Coder-3B-Instruct-Q8_0.gguf \
|
||||||
--fim-qwen-3b-default \
|
--fim-qwen-3b-default \
|
||||||
--port ''${PORT} \
|
--port ''${PORT} \
|
||||||
-fit off \
|
-fit off \
|
||||||
@@ -246,58 +276,17 @@ in
|
|||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
# https://huggingface.co/unsloth/SmolLM3-3B-128K-GGUF/tree/main
|
# https://huggingface.co/unsloth/Qwen3-4B-Instruct-2507-GGUF/tree/main
|
||||||
"smollm3-3b-instruct" = {
|
"qwen3-4b-2507-instruct" = {
|
||||||
name = "SmolLM3(3B) - Instruct";
|
name = "Qwen3 2507 (4B) - Instruct";
|
||||||
cmd = ''
|
cmd = ''
|
||||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||||
--port ''${PORT} \
|
--port ''${PORT} \
|
||||||
-m /mnt/ssd/Models/SmolLM3-3B-128K-UD-Q4_K_XL.gguf \
|
-m /mnt/ssd/Models/Qwen3/Qwen3-4B-Instruct-2507-Q4_K_M.gguf \
|
||||||
-c 98304 \
|
-c 98304 \
|
||||||
--temp 0.6 \
|
-fit off \
|
||||||
--top-p 0.95 \
|
-ctk q8_0 \
|
||||||
--reasoning-budget 0 \
|
-ctv q8_0 \
|
||||||
-dev CUDA0
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
|
|
||||||
# https://huggingface.co/unsloth/ERNIE-4.5-21B-A3B-PT-GGUF/tree/main
|
|
||||||
"ernie4.5-21b-instruct" = {
|
|
||||||
name = "ERNIE4.5 (21B) - Instruct";
|
|
||||||
cmd = ''
|
|
||||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
|
||||||
--port ''${PORT} \
|
|
||||||
-m /mnt/ssd/Models/ERNIE-4.5-21B-A3B-PT-UD-Q4_K_XL.gguf \
|
|
||||||
-c 98304 \
|
|
||||||
--temp 0.7 \
|
|
||||||
--min-p 0.0 \
|
|
||||||
--top-p 0.8 \
|
|
||||||
--top-k 20
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
|
|
||||||
# https://huggingface.co/mradermacher/OLMoE-1B-7B-0125-Instruct-GGUF/tree/main
|
|
||||||
"olmoe-7b-instruct" = {
|
|
||||||
name = "OLMoE (7B) - Instruct";
|
|
||||||
cmd = ''
|
|
||||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
|
||||||
--port ''${PORT} \
|
|
||||||
-m /mnt/ssd/Models/OLMoE-1B-7B-0125-Instruct.Q8_0.gguf \
|
|
||||||
-dev CUDA1
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
|
|
||||||
# https://huggingface.co/gabriellarson/Phi-mini-MoE-instruct-GGUF/tree/main
|
|
||||||
"phi-mini-8b-instruct" = {
|
|
||||||
name = "Phi mini (8B) - Instruct";
|
|
||||||
cmd = ''
|
|
||||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
|
||||||
--port ''${PORT} \
|
|
||||||
-m /mnt/ssd/Models/Phi-mini-MoE-instruct-Q8_0.gguf \
|
|
||||||
--repeat-penalty 1.05 \
|
|
||||||
--temp 0.0 \
|
|
||||||
--top-p 1.0 \
|
|
||||||
--top-k 1 \
|
|
||||||
-dev CUDA1
|
-dev CUDA1
|
||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user