diff --git a/homes/x86_64-linux/evanreichard@lin-va-desktop/default.nix b/homes/x86_64-linux/evanreichard@lin-va-desktop/default.nix index f791fba..590b6f2 100755 --- a/homes/x86_64-linux/evanreichard@lin-va-desktop/default.nix +++ b/homes/x86_64-linux/evanreichard@lin-va-desktop/default.nix @@ -23,6 +23,7 @@ in terminal = { bash = enabled; btop = enabled; + direnv = enabled; tmux = enabled; }; }; diff --git a/systems/x86_64-linux/lin-va-desktop/default.nix b/systems/x86_64-linux/lin-va-desktop/default.nix index 66e1094..fcd95e3 100755 --- a/systems/x86_64-linux/lin-va-desktop/default.nix +++ b/systems/x86_64-linux/lin-va-desktop/default.nix @@ -97,8 +97,9 @@ in cmd = '' ${pkgs.reichard.llama-cpp}/bin/llama-server \ --port ''${PORT} \ - -m /mnt/ssd/Models/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL.gguf \ - --chat-template-file /mnt/ssd/Models/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL_template.jinja \ + -m /mnt/ssd/Models/Devstral/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL.gguf \ + --chat-template-file /mnt/ssd/Models/Devstral/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL_template.jinja \ + --temp 0.15 \ -c 98304 \ -ctk q8_0 \ -ctv q8_0 \ @@ -107,14 +108,30 @@ in ''; }; + # https://huggingface.co/mradermacher/gpt-oss-20b-heretic-v2-i1-GGUF/tree/main + # --chat-template-kwargs '{\"reasoning_effort\":\"low\"}' + "gpt-oss-20b-thinking" = { + name = "GPT OSS (20B) - Thinking"; + cmd = '' + ${pkgs.reichard.llama-cpp}/bin/llama-server \ + --port ''${PORT} \ + -m /mnt/ssd/Models/GPT-OSS/gpt-oss-20b-heretic-v2.i1-MXFP4_MOE.gguf \ + -c 131072 \ + --temp 1.0 \ + --top-p 1.0 \ + --top-k 40 \ + -dev CUDA0 + ''; + }; + # https://huggingface.co/unsloth/Qwen3-Next-80B-A3B-Instruct-GGUF/tree/main "qwen3-next-80b-instruct" = { name = "Qwen3 Next (80B) - Instruct"; cmd = '' ${pkgs.reichard.llama-cpp}/bin/llama-server \ --port ''${PORT} \ - -m /mnt/ssd/Models/Qwen3-Next-80B-A3B-Instruct-UD-Q4_K_XL.gguf \ - -c 131072 \ + -m /mnt/ssd/Models/Qwen3/Qwen3-Next-80B-A3B-Instruct-UD-Q2_K_XL.gguf \ + -c 262144 \ --temp 0.7 \ --min-p 0.0 \ --top-p 0.8 \ @@ -122,10 +139,25 @@ in --repeat-penalty 1.05 \ -ctk q8_0 \ -ctv q8_0 \ - -fit off \ - -ncmoe 15 \ - -ts 77,23 + -fit off ''; + + # cmd = '' + # ${pkgs.reichard.llama-cpp}/bin/llama-server \ + # --port ''${PORT} \ + # -m /mnt/ssd/Models/Qwen3/Qwen3-Next-80B-A3B-Instruct-UD-Q4_K_XL.gguf \ + # -c 131072 \ + # --temp 0.7 \ + # --min-p 0.0 \ + # --top-p 0.8 \ + # --top-k 20 \ + # --repeat-penalty 1.05 \ + # -ctk q8_0 \ + # -ctv q8_0 \ + # -fit off \ + # -ncmoe 15 \ + # -ts 77,23 + # ''; }; # https://huggingface.co/unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF/tree/main @@ -134,7 +166,7 @@ in cmd = '' ${pkgs.reichard.llama-cpp}/bin/llama-server \ --port ''${PORT} \ - -m /mnt/ssd/Models/Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf \ + -m /mnt/ssd/Models/Qwen3/Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf \ -c 262144 \ --temp 0.7 \ --min-p 0.0 \ @@ -153,7 +185,7 @@ in cmd = '' ${pkgs.reichard.llama-cpp}/bin/llama-server \ --port ''${PORT} \ - -m /mnt/ssd/Models/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf \ + -m /mnt/ssd/Models/Qwen3/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf \ -c 262144 \ --temp 0.7 \ --min-p 0.0 \ @@ -172,7 +204,7 @@ in cmd = '' ${pkgs.reichard.llama-cpp}/bin/llama-server \ --port ''${PORT} \ - -m /mnt/ssd/Models/Qwen3-30B-A3B-Thinking-2507-UD-Q4_K_XL.gguf \ + -m /mnt/ssd/Models/Qwen3/Qwen3-30B-A3B-Thinking-2507-UD-Q4_K_XL.gguf \ -c 262144 \ --temp 0.7 \ --min-p 0.0 \ @@ -185,19 +217,17 @@ in ''; }; - # https://huggingface.co/mradermacher/gpt-oss-20b-heretic-v2-i1-GGUF/tree/main - # --chat-template-kwargs '{\"reasoning_effort\":\"low\"}' - "gpt-oss-20b-thinking" = { - name = "GPT OSS (20B) - Thinking"; + # https://huggingface.co/unsloth/Nemotron-3-Nano-30B-A3B-GGUF/tree/main + "nemotron-3-nano-30b-thinking" = { + name = "Nemotron 3 Nano (30B) - Thinking"; cmd = '' ${pkgs.reichard.llama-cpp}/bin/llama-server \ --port ''${PORT} \ - -m /mnt/ssd/Models/gpt-oss-20b-heretic-v2.i1-MXFP4_MOE.gguf \ - -c 131072 \ - --temp 1.0 \ - --top-p 1.0 \ - --top-k 40 \ - -dev CUDA0 + -m /mnt/ssd/Models/Nemotron/Nemotron-3-Nano-30B-A3B-UD-Q4_K_XL.gguf \ + -c 1048576 \ + --temp 1.1 \ + --top-p 0.95 \ + -fit off ''; }; @@ -208,7 +238,7 @@ in ${pkgs.reichard.llama-cpp}/bin/llama-server \ --port ''${PORT} \ -m /mnt/ssd/Models/Qwen3-VL-8B-Instruct-UD-Q4_K_XL.gguf \ - --mmproj /mnt/ssd/Models/Qwen3-VL-8B-Instruct-UD-Q4_K_XL_mmproj-F16.gguf \ + --mmproj /mnt/ssd/Models/Qwen3/Qwen3-VL-8B-Instruct-UD-Q4_K_XL_mmproj-F16.gguf \ -c 65536 \ --temp 0.7 \ --min-p 0.0 \ @@ -225,7 +255,7 @@ in name = "Qwen2.5 Coder (7B) - Instruct"; cmd = '' ${pkgs.reichard.llama-cpp}/bin/llama-server \ - -m /mnt/ssd/Models/Qwen2.5-Coder-7B-Instruct-Q8_0.gguf \ + -m /mnt/ssd/Models/Qwen2.5/Qwen2.5-Coder-7B-Instruct-Q8_0.gguf \ --fim-qwen-7b-default \ -c 131072 \ --port ''${PORT} \ @@ -238,7 +268,7 @@ in name = "Qwen2.5 Coder (3B) - Instruct"; cmd = '' ${pkgs.reichard.llama-cpp}/bin/llama-server \ - -m /mnt/ssd/Models/Qwen2.5-Coder-3B-Instruct-Q8_0.gguf \ + -m /mnt/ssd/Models/Qwen2.5/Qwen2.5-Coder-3B-Instruct-Q8_0.gguf \ --fim-qwen-3b-default \ --port ''${PORT} \ -fit off \ @@ -246,58 +276,17 @@ in ''; }; - # https://huggingface.co/unsloth/SmolLM3-3B-128K-GGUF/tree/main - "smollm3-3b-instruct" = { - name = "SmolLM3(3B) - Instruct"; + # https://huggingface.co/unsloth/Qwen3-4B-Instruct-2507-GGUF/tree/main + "qwen3-4b-2507-instruct" = { + name = "Qwen3 2507 (4B) - Instruct"; cmd = '' ${pkgs.reichard.llama-cpp}/bin/llama-server \ --port ''${PORT} \ - -m /mnt/ssd/Models/SmolLM3-3B-128K-UD-Q4_K_XL.gguf \ + -m /mnt/ssd/Models/Qwen3/Qwen3-4B-Instruct-2507-Q4_K_M.gguf \ -c 98304 \ - --temp 0.6 \ - --top-p 0.95 \ - --reasoning-budget 0 \ - -dev CUDA0 - ''; - }; - - # https://huggingface.co/unsloth/ERNIE-4.5-21B-A3B-PT-GGUF/tree/main - "ernie4.5-21b-instruct" = { - name = "ERNIE4.5 (21B) - Instruct"; - cmd = '' - ${pkgs.reichard.llama-cpp}/bin/llama-server \ - --port ''${PORT} \ - -m /mnt/ssd/Models/ERNIE-4.5-21B-A3B-PT-UD-Q4_K_XL.gguf \ - -c 98304 \ - --temp 0.7 \ - --min-p 0.0 \ - --top-p 0.8 \ - --top-k 20 - ''; - }; - - # https://huggingface.co/mradermacher/OLMoE-1B-7B-0125-Instruct-GGUF/tree/main - "olmoe-7b-instruct" = { - name = "OLMoE (7B) - Instruct"; - cmd = '' - ${pkgs.reichard.llama-cpp}/bin/llama-server \ - --port ''${PORT} \ - -m /mnt/ssd/Models/OLMoE-1B-7B-0125-Instruct.Q8_0.gguf \ - -dev CUDA1 - ''; - }; - - # https://huggingface.co/gabriellarson/Phi-mini-MoE-instruct-GGUF/tree/main - "phi-mini-8b-instruct" = { - name = "Phi mini (8B) - Instruct"; - cmd = '' - ${pkgs.reichard.llama-cpp}/bin/llama-server \ - --port ''${PORT} \ - -m /mnt/ssd/Models/Phi-mini-MoE-instruct-Q8_0.gguf \ - --repeat-penalty 1.05 \ - --temp 0.0 \ - --top-p 1.0 \ - --top-k 1 \ + -fit off \ + -ctk q8_0 \ + -ctv q8_0 \ -dev CUDA1 ''; };