From bf7cc81a610ec0233d242c93abce4d2d4b947f38 Mon Sep 17 00:00:00 2001 From: Evan Reichard Date: Thu, 26 Feb 2026 15:47:25 -0500 Subject: [PATCH] feat: add coding model filtering and CUDA acceleration - claude: filter model completion to coding/synthetic models only - llama-swap: update model to IQ4_XS and add CUDA device selection --- .../home/programs/terminal/claude-code/default.nix | 12 ++++++++++-- modules/nixos/services/llama-swap/config.nix | 5 +++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/modules/home/programs/terminal/claude-code/default.nix b/modules/home/programs/terminal/claude-code/default.nix index 7884f07..3444a2c 100755 --- a/modules/home/programs/terminal/claude-code/default.nix +++ b/modules/home/programs/terminal/claude-code/default.nix @@ -69,13 +69,21 @@ in local prev=''${COMP_WORDS[COMP_CWORD-1]} if [[ "$prev" == "-m" || "$prev" == "--model" ]]; then - local models=( $(${pkgs.curl}/bin/curl -s -H "Authorization: Bearer ${authToken}" "${baseUrl}/v1/models" | ${pkgs.jq}/bin/jq -r '.data[].id' 2>/dev/null) ) + local models=( $(${pkgs.curl}/bin/curl -s -H "Authorization: Bearer ${authToken}" "${baseUrl}/v1/models" | ${pkgs.jq}/bin/jq -r ' + .data[] | + select( + (try (.meta.llamaswap.type[] | contains("coding")) catch false) or + (.name | startswith("synthetic:")) + ) | + .id + ' 2>/dev/null) ) + COMPREPLY=( $(compgen -W "''${models[*]}" -- "$cur") ) fi } # Register Completion - complete -F _complete_claude_custom claude + complete -F _complete_claude_custom claude-custom ''; }; }; diff --git a/modules/nixos/services/llama-swap/config.nix b/modules/nixos/services/llama-swap/config.nix index 06daf57..adc2def 100644 --- a/modules/nixos/services/llama-swap/config.nix +++ b/modules/nixos/services/llama-swap/config.nix @@ -165,19 +165,20 @@ in }; }; - # https://huggingface.co/unsloth/Qwen3.5-35B-A3B-GGUF/tree/main + # https://huggingface.co/AesSedai/Qwen3.5-35B-A3B-GGUF/tree/main "qwen3.5-35b-thinking" = { name = "Qwen3.5 (35B) - Thinking"; macros.ctx = "262144"; cmd = '' ${llama-cpp}/bin/llama-server \ --port ''${PORT} \ - -m /mnt/ssd/Models/Qwen3.5/Qwen3.5-35B-A3B-UD-Q4_K_XL.gguf \ + -m /mnt/ssd/Models/Qwen3.5/Qwen3.5-35B-A3B-IQ4_XS-00001-of-00002.gguf \ -c ''${ctx} \ --temp 0.6 \ --top-p 0.95 \ --top-k 20 \ --min-p 0.00 \ + -dev CUDA0 \ -fit off ''; # --chat-template-kwargs "{\"enable_thinking\": false}"