Compare commits

...

2 Commits

Author SHA1 Message Date
1bce17c5f9 chore(llm): update llama-cpp, llama-swap and switch to qwen3.5-27b-thinking
- Bump llama-cpp from version 8157 to 8196
- Bump llama-swap from version 192 to 197
- Switch default assistant model from qwen3-coder-next-80b to qwen3.5-27b-thinking
- Remove glm-4-32b-instruct model configuration
- Update qwen3.5-27b-thinking config:
  - Use bartowski quantization (IQ4_XS) instead of unsloth
  - Increase context window from 131k to 196k
  - Add cache type settings (q8_0) and CUDA device
- Add 1password-cli to home-manager programs
- Fix typo: 'dispay' -> 'display' in llm-config.lua
2026-03-05 07:32:57 -05:00
9234782492 Update modules/nixos/services/openssh/default.nix 2026-02-27 01:03:38 +00:00
6 changed files with 15 additions and 31 deletions

View File

@@ -50,5 +50,6 @@ in
colima colima
docker docker
keycastr keycastr
_1password-cli
]; ];
} }

View File

@@ -1,5 +1,5 @@
local llm_endpoint = "https://llm-api.va.reichard.io" local llm_endpoint = "https://llm-api.va.reichard.io"
local llm_assistant_model = "qwen3-coder-next-80b-instruct" local llm_assistant_model = "qwen3.5-27b-thinking"
local llm_infill_model = llm_assistant_model local llm_infill_model = llm_assistant_model
local current_fim = "copilot" local current_fim = "copilot"
@@ -63,7 +63,7 @@ codecompanion.setup({
inline = { adapter = "llamaswap" }, inline = { adapter = "llamaswap" },
cmd = { adapter = "llamaswap" }, cmd = { adapter = "llamaswap" },
}, },
chat = { dispay = "telescope" }, chat = { display = "telescope" },
memory = { opts = { chat = { enabled = true } } }, memory = { opts = { chat = { enabled = true } } },
}) })

View File

@@ -54,27 +54,6 @@ in
}; };
}; };
# https://huggingface.co/unsloth/GLM-4-32B-0414-GGUF/tree/main
"glm-4-32b-instruct" = {
name = "GLM 4 (32B) - Instruct";
macros.ctx = "32768";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/GLM/GLM-4-32B-0414-Q4_K_M.gguf \
-c ''${ctx} \
--temp 0.6 \
--top-k 40 \
--top-p 0.95 \
--min-p 0.0 \
-fit off \
-dev CUDA0
'';
metadata = {
type = [ "text-generation" ];
};
};
# https://huggingface.co/mradermacher/gpt-oss-20b-heretic-v2-i1-GGUF/tree/main # https://huggingface.co/mradermacher/gpt-oss-20b-heretic-v2-i1-GGUF/tree/main
"gpt-oss-20b-thinking" = { "gpt-oss-20b-thinking" = {
name = "GPT OSS (20B) - Thinking"; name = "GPT OSS (20B) - Thinking";
@@ -190,19 +169,22 @@ in
}; };
}; };
# https://huggingface.co/unsloth/Qwen3.5-27B-GGUF/tree/main # https://huggingface.co/bartowski/Qwen_Qwen3.5-27B-GGUF/tree/main
"qwen3.5-27b-thinking" = { "qwen3.5-27b-thinking" = {
name = "Qwen3.5 (27B) - Thinking"; name = "Qwen3.5 (27B) - Thinking";
macros.ctx = "131072"; macros.ctx = "196608";
cmd = '' cmd = ''
${llama-cpp}/bin/llama-server \ ${llama-cpp}/bin/llama-server \
--port ''${PORT} \ --port ''${PORT} \
-m /mnt/ssd/Models/Qwen3.5/Qwen3.5-27B-UD-Q4_K_XL.gguf \ -m /mnt/ssd/Models/Qwen3.5/Qwen_Qwen3.5-27B-IQ4_XS.gguf \
-c ''${ctx} \ -c ''${ctx} \
--temp 0.6 \ --temp 0.6 \
--top-p 0.95 \ --top-p 0.95 \
--top-k 20 \ --top-k 20 \
--min-p 0.00 \ --min-p 0.00 \
-ctk q8_0 \
-ctv q8_0 \
-dev CUDA0 \
-fit off -fit off
''; '';
# --chat-template-kwargs "{\"enable_thinking\": false}" # --chat-template-kwargs "{\"enable_thinking\": false}"

View File

@@ -67,6 +67,7 @@ in
"hmac-sha2-512-etm@openssh.com" "hmac-sha2-512-etm@openssh.com"
"hmac-sha2-256-etm@openssh.com" "hmac-sha2-256-etm@openssh.com"
"umac-128-etm@openssh.com" "umac-128-etm@openssh.com"
"hmac-sha2-512"
]; ];
}; };

View File

@@ -7,12 +7,12 @@
vulkanSupport = true; vulkanSupport = true;
}).overrideAttrs }).overrideAttrs
(oldAttrs: rec { (oldAttrs: rec {
version = "8157"; version = "8196";
src = pkgs.fetchFromGitHub { src = pkgs.fetchFromGitHub {
owner = "ggml-org"; owner = "ggml-org";
repo = "llama.cpp"; repo = "llama.cpp";
tag = "b${version}"; tag = "b${version}";
hash = "sha256-3u9BWMZGGL3RTWxlEl5swOBe4yDoBAEfz/m2b1hw6fc="; hash = "sha256-GZRHiyT8mvhV5RTczDRnCSh31UxRZ3F8tEBC1l8oFNQ=";
leaveDotGit = true; leaveDotGit = true;
postFetch = '' postFetch = ''
git -C "$out" rev-parse --short HEAD > $out/COMMIT git -C "$out" rev-parse --short HEAD > $out/COMMIT

View File

@@ -13,13 +13,13 @@ let
in in
buildGoModule (finalAttrs: { buildGoModule (finalAttrs: {
pname = "llama-swap"; pname = "llama-swap";
version = "192"; version = "197";
src = fetchFromGitHub { src = fetchFromGitHub {
owner = "mostlygeek"; owner = "mostlygeek";
repo = "llama-swap"; repo = "llama-swap";
tag = "v${finalAttrs.version}"; tag = "v${finalAttrs.version}";
hash = "sha256-CMzF935cREAFfWHt5yzX05wvp/DC/3GWZZfhRtJVYaA="; hash = "sha256-EXgyYmpbN/zzr6KeSpvFEB+FS7gDIZFinNMv70v5boY=";
# populate values that require us to use git. By doing this in postFetch we # populate values that require us to use git. By doing this in postFetch we
# can delete .git afterwards and maintain better reproducibility of the src. # can delete .git afterwards and maintain better reproducibility of the src.
leaveDotGit = true; leaveDotGit = true;
@@ -35,7 +35,7 @@ buildGoModule (finalAttrs: {
vendorHash = "sha256-XiDYlw/byu8CWvg4KSPC7m8PGCZXtp08Y1velx4BR8U="; vendorHash = "sha256-XiDYlw/byu8CWvg4KSPC7m8PGCZXtp08Y1velx4BR8U=";
passthru.ui = callPackage ./ui.nix { llama-swap = finalAttrs.finalPackage; }; passthru.ui = callPackage ./ui.nix { llama-swap = finalAttrs.finalPackage; };
passthru.npmDepsHash = "sha256-4VH9jJ1Ae16p8kUubZBrIwwqw/X8I+wDg378G82WCtU="; passthru.npmDepsHash = "sha256-Fs7+JKE8YBp2Xj8bVBlwmT+UwuD642VeUHiPx+fv94c=";
nativeBuildInputs = [ nativeBuildInputs = [
versionCheckHook versionCheckHook