diff --git a/.agents/skills/update-vllm-3090-configs/SKILL.md b/.agents/skills/update-vllm-3090-configs/SKILL.md index 5266bbc..a7323cd 100644 --- a/.agents/skills/update-vllm-3090-configs/SKILL.md +++ b/.agents/skills/update-vllm-3090-configs/SKILL.md @@ -19,6 +19,15 @@ Local config keys: - `vllm-qwen3.6-27b-long-text` - `vllm-qwen3.6-27b-long-vision` +## Hash Tracking + +Each config entry stores an upstream commit hash comment: + `# Upstream: club-3090 () - ` + +When comparing, first extract stored hashes. If a config's hash matches +upstream HEAD, skip it (report "already synced"). Only full-diff configs +whose hash differs. Update the hash comment when edits are applied. + ## Upstream References Compare against `club-3090` master: @@ -37,25 +46,26 @@ git clone https://github.com/noonghunna/club-3090 _scratch/club-3090 2>/dev/null ## Required Workflow 1. Fetch/update upstream refs under `_scratch/club-3090` or fetch the raw files. -2. Compare upstream compose files to the three local llama-swap entries. Translate docker-compose semantics into the existing `docker run`/llama-swap format. -3. Compare upstream `scripts/setup.sh` Genesis pin to local `GENESIS_PIN` in `setup-qwen36-vllm.sh`. -4. Check upstream compose volumes/entrypoint for sidecar patches. If patches are added, removed, renamed, or invoked differently, update both: +2. Extract stored upstream hashes from `# Upstream: club-3090 ...` comments in config.nix. Skip any config whose hash matches upstream HEAD (report "already synced"). +3. Compare upstream compose files to the remaining local llama-swap entries. Translate docker-compose semantics into the existing `docker run`/llama-swap format. +4. Compare upstream `scripts/setup.sh` Genesis pin to local `GENESIS_PIN` in `setup-qwen36-vllm.sh`. +5. Check upstream compose volumes/entrypoint for sidecar patches. If patches are added, removed, renamed, or invoked differently, update both: - runtime mounts and `python3 /patches/...` calls in `config.nix` - download/install logic and summary in `setup-qwen36-vllm.sh` -5. Ignore these diffs unless the user explicitly asks otherwise: +6. Ignore these diffs unless the user explicitly asks otherwise: - `shm_size` / shm-related compose settings - local timing patch `patch_timings_07351e088.py` and its mount/invocation - model served-name differences caused by llama-swap `${MODEL_ID}` - `HUGGING_FACE_HUB_TOKEN`; keep local CUDA device/env choices - upstream relative paths vs local `/mnt/ssd/vLLM/...` paths - docker-compose format vs local llama-swap/Nix format -6. Before editing, present: +7. Before editing, present: - upstream files/commit checked - meaningful diffs found - ignored diffs - exact planned local changes Then wait for explicit user approval. -7. After approval, edit minimally and validate: +8. After approval, edit minimally and update the `# Upstream: club-3090 ...` hash comments. Validate: - `bash -n modules/nixos/services/llama-swap/setup-qwen36-vllm.sh` - `nix-instantiate --parse modules/nixos/services/llama-swap/config.nix` -8. Summarize changed files and any remaining upstream differences. +9. Summarize changed files and any remaining upstream differences. diff --git a/modules/nixos/services/llama-swap/config.nix b/modules/nixos/services/llama-swap/config.nix index 7c05487..5dea8ef 100644 --- a/modules/nixos/services/llama-swap/config.nix +++ b/modules/nixos/services/llama-swap/config.nix @@ -129,7 +129,7 @@ in }; # https://github.com/noonghunna/club-3090/tree/master/models/qwen3.6-27b/vllm - # Synced from: club-3090 e1137d6 (2026-05-09) - single/long-text.yml + # Upstream: club-3090 83bf73d (2026-05-10) - single/long-text.yml # Long-text variant - 180K context, text-only (no vision) # TurboQuant 3-bit KV + MTP n=3 + Genesis v7.69 + Cliff 2 closure recipe "vllm-qwen3.6-27b-long-text" = { @@ -157,6 +157,7 @@ in --language-model-only --trust-remote-code --reasoning-parser qwen3 + --default-chat-template-kwargs '{"enable_thinking": false}' --enable-auto-tool-choice --tool-call-parser qwen3_coder --enable-prefix-caching @@ -268,7 +269,7 @@ in }; # https://github.com/noonghunna/club-3090/tree/master/models/qwen3.6-27b/vllm - # Synced from: club-3090 e1137d6 (2026-05-09) - single/long-vision.yml + # Upstream: club-3090 83bf73d (2026-05-10) - single/long-vision.yml # Long-vision variant - 145K context with vision tower active # TurboQuant 3-bit KV + MTP n=3 + Genesis v7.69 + Cliff 2 env vars (mem-util kept at 0.95) "vllm-qwen3.6-27b-long-vision" = { @@ -295,6 +296,7 @@ in --kv-cache-dtype turboquant_3bit_nc --trust-remote-code --reasoning-parser qwen3 + --default-chat-template-kwargs '{"enable_thinking": false}' --enable-auto-tool-choice --tool-call-parser qwen3_coder --enable-prefix-caching @@ -401,7 +403,7 @@ in }; # https://github.com/noonghunna/club-3090/tree/master/models/qwen3.6-27b/vllm - # Synced from: club-3090 ae4846f (2026-05-02) - docker-compose.tools-text.yml + # Upstream: club-3090 83bf73d (2026-05-10) - single/tools-text.yml # Tools-text variant - 75K context, text-only (no vision) # fp8_e5m2 KV + MTP n=3. IDE agents (Cline, Cursor, OpenCode, etc.) "vllm-qwen3.6-27b-tools-text" = { @@ -429,6 +431,7 @@ in --language-model-only --trust-remote-code --reasoning-parser qwen3 + --default-chat-template-kwargs '{"enable_thinking": false}' --enable-auto-tool-choice --tool-call-parser qwen3_coder --enable-prefix-caching