chore: bump pi-coding-agent to 0.78.1, reduce llama-swap ctx size, add builder host key
- packages/pi-coding-agent: 0.78.0 → 0.78.1 (new gondolin extension deps) - llama-swap: reduce qwen3.6-35b-dual macros.ctx 215000 → 131072 - nix: add publicHostKey for remote builder
This commit is contained in:
@@ -38,6 +38,7 @@ in
|
||||
sshUser = "evanreichard";
|
||||
protocol = "ssh";
|
||||
sshKey = config.sops.secrets.builder_ssh_key.path;
|
||||
publicHostKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGlpC0rorATKzK8mLMKgCYqM5N2N/GgS2t4M13cwnAOS";
|
||||
supportedFeatures = [
|
||||
"benchmark"
|
||||
"big-parallel"
|
||||
|
||||
@@ -682,7 +682,10 @@ in
|
||||
# https://huggingface.co/unsloth/Qwen3.6-35B-A3B-MTP-GGUF/tree/main
|
||||
"qwen3.6-35b-dual" = {
|
||||
name = "Qwen3.6 35B (Dual GPU, UD-Q6)";
|
||||
macros.ctx = "215000";
|
||||
# macros.ctx = "215000";
|
||||
# -ctk q8_0 \
|
||||
# -ctv q8_0 \
|
||||
macros.ctx = "131072";
|
||||
cmd = ''
|
||||
${llama-cpp}/bin/llama-server \
|
||||
--port ''${PORT} \
|
||||
@@ -694,8 +697,6 @@ in
|
||||
--top-k 20 \
|
||||
--min-p 0.00 \
|
||||
--presence-penalty 0.0 \
|
||||
-ctk q8_0 \
|
||||
-ctv q8_0 \
|
||||
--spec-type draft-mtp \
|
||||
--spec-draft-n-max 3 \
|
||||
-dev CUDA0,CUDA1 \
|
||||
|
||||
Reference in New Issue
Block a user