Compare commits

..

69 Commits

Author SHA1 Message Date
379fe5c290 feat(open-proxy): configure shared token file 2026-06-16 15:03:36 -04:00
ea36629461 fix: webbrowser.open 2026-06-16 14:42:21 -04:00
0f85723755 feat: open-proxy 2026-06-16 14:29:17 -04:00
4db8c17f70 chore(nix): update builder public host key 2026-06-11 16:07:53 -04:00
36f2abc1a9 chore(thinkpad): reorganize packages and drop ACPI GPE mask
Move mosh/unzip into common home packages, add orca-slicer to the thinkpad home, and remove the now-redundant system package list and kernel param.
2026-06-11 16:07:53 -04:00
4cd5d1ba62 chore: bump pi-coding-agent to 0.78.1, reduce llama-swap ctx size, add builder host key
- packages/pi-coding-agent: 0.78.0 → 0.78.1 (new gondolin extension deps)
- llama-swap: reduce qwen3.6-35b-dual macros.ctx 215000 → 131072
- nix: add publicHostKey for remote builder
2026-06-04 22:44:01 -04:00
73b2bbc973 fix: nvidia driver & llama cpp update 2026-06-04 06:27:38 -04:00
bc95e479d6 fix(asahi): reload wifi driver after resume 2026-06-03 21:57:58 -04:00
d15e610337 fix(firefox): restore declarative extensions 2026-06-03 21:57:39 -04:00
f15765d5a7 fix: nvim diag toggle 2026-06-03 08:17:58 -04:00
881f8fe816 fix: nvim update config 2026-06-02 21:21:05 -04:00
9d8ec6d7d2 chore: rename swww to awww 2026-06-02 21:11:20 -04:00
fd9695a754 chore: hyprland migrate config 2026-06-02 21:09:40 -04:00
5cf4b93072 chore: hyprland updates 2026-06-02 20:55:06 -04:00
c359a26d3f chore: update 25.11 -> 26.05 2026-06-02 20:02:57 -04:00
fbeb040914 pi-coding-agent: 0.77.0 → 0.78.0 2026-05-31 19:46:32 -04:00
e18aee716a docs(pi): constrain git commit bodies 2026-05-30 09:00:06 -04:00
b4e1afd012 fix(pi-coding-agent): include browser tools in wrapper 2026-05-30 08:58:51 -04:00
6dd5e38d9b feat(tuxguitar): add source-built package 2026-05-30 08:57:51 -04:00
f4a213de8e chore: llama cpp tweaks 2026-05-29 22:32:20 -04:00
68cb7ea3d5 feat(nvim): exrc dynamic LSP 2026-05-29 07:43:36 -04:00
7842c9cd96 chore(pi-coding-agent): bump to 0.77.0 2026-05-29 07:36:24 -04:00
285fe99461 feat: create skill variable helper 2026-05-28 17:45:37 -04:00
544ab6f781 chore: update pi-coding-agent 2026-05-28 17:16:01 -04:00
332713f8fc feat(pi): adopt pi-web extension for web browsing
Replaces the local web-glimpse skill with the pi-web package, configured
via a sops template so the kagi token can be embedded alongside the
searxng base URL.
2026-05-25 23:19:00 -04:00
b4fffef1d7 docs: add asking questions and rephrase-over-append guidelines to AGENTS.md 2026-05-24 07:53:16 -04:00
a3e080b2f2 build(pi-coding-agent): bump version 0.75.4 → 0.75.5 2026-05-24 07:53:12 -04:00
1b43af76b1 chore: update pi 2026-05-21 07:35:24 -04:00
a2298fcfae fix: restart llama swap on config change 2026-05-20 16:46:40 -04:00
79884e8a77 fix(llama-swap): reduce qwen3.6 dual context 2026-05-20 00:11:08 -04:00
a7941db240 chore(packages): bump llama-cpp to HEAD 20260519
Update llama-cpp from tagged release b9196 to HEAD build dated
2026-05-19 (rev b28a2f37). Switch to date-based versioning for HEAD
builds and update source/webui hashes.

Also clarify update-package-hashes skill: always use lib.fakeHash
instead of literal sha256 placeholder strings.
2026-05-19 20:44:30 -04:00
964b0dd2a1 feat(llama-swap): add qwen3.6-27b IQ4_KS config for CUDA0 2026-05-19 08:00:13 -04:00
81ffe67cce refactor(llama-swap): replace --parallel with -np and add -kvu flag
Switch llama-server invocations from --parallel to -np with -kvu
(kv-cache unified) across Qwen3.6 model configs. Also reduce
context for qwen3.6-27b-cuda0 from 150k to 140k.
2026-05-19 06:22:08 -04:00
5b2111c7e8 chore(packages): bump pi-coding-agent to v0.75.3
Update bundled package-lock.json and refresh npmDepsHash.
2026-05-18 13:28:16 -04:00
62f3ddc218 chore(packages): bump qwen-code to 0.16.0-preview.0 2026-05-17 21:02:34 -04:00
0f9513ec26 chore(packages): bump llama-swap 208 → 216 2026-05-17 20:41:23 -04:00
1623f8d14a chore(packages): bump pi-coding-agent to v0.75.0
Update src hash, npmDepsHash, and package-lock.json for v0.75.0.
Fix preBuild substitute pattern to match new generate-image-models step.
2026-05-17 16:45:41 -04:00
719f2798e5 chore(packages): bump llama-cpp b9180 → b9196 2026-05-17 13:09:49 -04:00
24dd67a0de refactor(llama-swap): standardize model ids 2026-05-17 11:38:25 -04:00
89e2161ff4 build(reichard.llama-cpp): bump to b9180
Update version from b9159 to b9180. Adapt to upstream renames
(tools/server/webui -> tools/ui) and updated vite output paths.
Drop MTP patch (PR #22673) which has been merged upstream.
2026-05-16 13:58:15 -04:00
3b4f54b2b1 build(conduit): update to HEAD 8dfb14f 2026-05-15 13:28:44 -04:00
4e2d03ae89 build(packages): bump llama-cpp to b9159, add WebUI derivation, fix spec-type
- Bump llama-cpp from b9048 to b9159
- Add WebUI build derivation to work around HF bucket fetch in Nix sandbox
- Switch MTP patch from .patch to .diff (squashed unified diff applies cleanly)
- Refactor default.nix with let bindings for cleaner structure
- Add AGENTS.md documenting version/postFetch pitfalls
- Add qwen3.6-27b-vllm-50k single-GPU config to llama-swap
- Fix --spec-type from "mtp" to "draft-mtp" in llama.cpp configs
- Update update-package-hashes skill with fetchpatch/.diff guidance
2026-05-15 11:14:57 -04:00
eaf307db23 docs(pi/agents): expand principles and comment style guidance
Add explicit priority order (correctness > maintainability > polish),
tighten comment style to default-no-comment with why-only rationale,
and document splitting skill workflow from reference sub-docs.
2026-05-14 18:22:01 -04:00
b16d816a18 build(pi-coding-agent): bump to 0.74.0 with enriched lockfile workflow
Upstream v0.74.0 lockfile omits resolved/integrity metadata needed by
buildNpmPackage's offline NPM cache. Add a package-local enriched lockfile,
a script to regenerate it from the npm registry, and a prePatch step to
copy it into the build sandbox.
2026-05-12 17:13:43 -04:00
18e8a39ee3 feat(pi): add scout subagent for codebase reconnaissance 2026-05-12 16:51:15 -04:00
328bb6e1db feat(llama-swap): add ik-llama-cpp package and Qwen3.6-27B MTP config
Add ikawrakow/ik_llama.cpp as a new package with CUDA/Vulkan support,
enabling MTP (Multi-Token Prediction) and IQ4_KS quantization. Wire it
into llama-swap with a new 'ik-qwen3.6-27b-iq4ks-thinking' model config
and 'iq36' alias. Also add a chat template download to the vLLM setup
script and include the binary on lin-va-desktop.
2026-05-12 16:19:34 -04:00
a01f9e34ee chore: tweak ctx 2026-05-12 09:27:42 -04:00
9824728ccb feat(pi): add pi-subagents extension 2026-05-12 08:41:06 -04:00
9ec2d61fcc chore(llama-swap): bump llama-cpp to b9048 and swap in UD-Q4/Q6 MTP configs
Replace qwen3.6-27b-thinking and qwen3.6-27b-mtp-thinking with
qwen3.6-27b-udq4-thinking (single GPU) and qwen3.6-27b-udq6-thinking
(dual GPU). Update aliases and concurrent set accordingly.
2026-05-11 15:26:39 -04:00
1879e98ebc chore(lin-va-desktop): bump nvidia power limit from 250 to 290 2026-05-11 11:00:44 -04:00
4df32ad273 fix(llama-swap): allow qwen thinking by default 2026-05-11 09:51:01 -04:00
ecad94aab3 fix(llama-swap): update vllm timings patch 2026-05-11 09:40:13 -04:00
187c717383 fix(pi extension): simplify regex for replacing 'pi' with 'claude code' 2026-05-11 09:06:30 -04:00
352e99c732 feat(llama-swap): add gemma-4-26b-vision model config 2026-05-10 16:59:35 -04:00
6fff658f9d feat(llama-swap): add --default-chat-template-kwargs to vLLM 3090 configs
Sync all three Qwen3.6 27B vLLM configs (tools-text, long-text,
long-vision) with club-3090 83bf73d. Adds disable-thinking flag
and introduces upstream hash tracking comments for future syncs.

Update update-vllm-3090-configs skill to use hash-based skip logic.
2026-05-10 16:57:17 -04:00
885fe8517d chore: remove fish 2026-05-10 09:27:48 -04:00
2133786985 chore: update flake.lock inputs 2026-05-10 08:36:23 -04:00
b41e9f2a84 docs(pi): add agent knowledge capture guidance 2026-05-09 10:18:13 -04:00
b25a933dd0 docs(pi): tighten agent guidance 2026-05-09 10:16:34 -04:00
37b0fae7e2 fix(llama-swap): sync qwen vllm 3090 configs 2026-05-09 10:16:32 -04:00
02410568dc docs(skills): add vllm 3090 update workflow 2026-05-09 10:16:29 -04:00
2f29008a2a chore: remove Handy speech-to-text dependency 2026-05-09 09:07:36 -04:00
87cdabbef4 feat(home): add Handy speech-to-text module 2026-05-07 22:36:30 -04:00
18a4e46bcb chore(lin-va-thinkpad): enable nix-ld 2026-05-07 20:37:56 -04:00
f3cc67b17d chore(llama-swap): tune presence penalty to 1.5 and remove repeat penalty 2026-05-07 20:37:54 -04:00
d142c5ff7e build(pi-coding-agent): bump to 0.73.1 and migrate to earendil-works org 2026-05-07 20:37:53 -04:00
fea5cc887d feat(llama-swap): add Qwen3.6-27B MTP thinking model and bump llama-cpp to b9045
Add qwen3.6-27b-mtp-thinking model config with 150K context, MTP
speculative decoding, and thinking mode support. Bump llama-cpp
from b9009 to b9045 and apply MTP patch from upstream PR #22673.
2026-05-06 12:49:49 -04:00
f6f17831bf feat(lin-va-terminal): enable nix-ld 2026-05-05 23:50:48 -04:00
5ded87b24a chore(packages): bump pi-coding-agent to 0.73.0 2026-05-05 19:14:32 -04:00
76 changed files with 8680 additions and 1191 deletions

View File

@@ -12,7 +12,7 @@ If the user provides only a **package name** (no version), look up the latest ve
## Hard Rules — Read First
1. **Never run `nix build .#<pkg>`** or `.#packages.<system>.<pkg>`. That compiles the package. Only realise **FOD sub-attributes** (`.src`, `.goModules`, `.npmDeps`, `.cargoDeps`) — those are pure downloads, not builds.
2. **Never** use `nix-prefetch-git`, `nix-prefetch-url`, `nix hash path`, `git clone` + manual hashing, `builtins.fetchGit`, or any other ad-hoc method to compute hashes. They produce hashes in formats that don't match what `fetchgit`/`fetchFromGitHub`/etc. expect, and you will waste time chasing mismatches.
2. **Never** use `nix-prefetch-git`, `nix-prefetch-github`, `nix-prefetch-url`, `nix hash path`, `nix hash file` (on a raw patch/tarball), `git clone` + manual hashing, `builtins.fetchGit`, or any other ad-hoc method to compute hashes. They produce hashes in formats that don't match what `fetchgit`/`fetchFromGitHub`/`fetchpatch` expect (notably: `fetchFromGitHub { leaveDotGit = true; }` is non-deterministic across machines, and `fetchpatch` normalizes patches — strips `index abc..def`, `From <sha>`, signatures — so its hash ≠ `nix hash file` of the raw `.patch`).
3. There are exactly **two** correct ways to get a hash, both listed below. If neither fits, stop and ask the user — don't improvise.
## The Only Two Methods
@@ -29,17 +29,27 @@ Copy the `hash = "sha256-..."` line from the output into the package's `src` blo
### Method B — FOD mismatch trick (for everything else)
For `vendorHash`, `npmDepsHash`, `cargoHash`, `cargoLock.outputHashes.<crate>`, or any `src` using a custom fetcher (`leaveDotGit`, `postFetch`, `fetchSubmodules`, etc. — applies to `llama-cpp` and `llama-swap`), realise the **specific FOD sub-attribute** and read the `got:` line from the error.
For `vendorHash`, `npmDepsHash`, `cargoHash`, `cargoLock.outputHashes.<crate>`, `fetchpatch` hashes, or any `src` using a custom fetcher (`leaveDotGit`, `postFetch`, `fetchSubmodules`, etc. — applies to `llama-cpp` and `llama-swap`), realise the **specific FOD sub-attribute** and read the `got:` line from the error.
```bash
nix build .#<name>.src --no-link 2>&1 | tee /tmp/hash.log # for src
nix build .#<name>.goModules --no-link 2>&1 | tee /tmp/hash.log # for vendorHash
nix build .#<name>.npmDeps --no-link 2>&1 | tee /tmp/hash.log # for npmDepsHash
nix build .#<name>.cargoDeps --no-link 2>&1 | tee /tmp/hash.log # for cargoHash
nix build .#<name> --no-link 2>&1 | tee /tmp/hash.log # for fetchpatch / other input FODs (see note)
grep -E '^[[:space:]]*got:' /tmp/hash.log | tail -1 | awk '{print $2}'
```
Setting the hash to `lib.fakeHash` (preferred when `lib` is in scope), `sha256-AAAA...` (44 A's), or leaving the old one in place all work — the build will fail at the FOD with `got: sha256-...` which is the correct value.
**`fetchpatch` note:** patches don't have a dedicated sub-attribute, so you must target the package itself. This is safe *only* when the patch hash is wrong (e.g. `lib.fakeHash`) — Nix realizes the patch FOD before compilation starts, so a hash mismatch aborts with `0 built (1 failed)` and zero compile work. If you accidentally fix all FODs correctly, `nix build .#<name>` will start compiling. To guard against this: always start patch hashes as `lib.fakeHash`, run the build, copy `got:`, paste, and only then re-verify with `.src` / sub-attribute builds (never re-run `.#<name>` to confirm).
**GitHub PR patches — `.patch` vs `.diff`:** When fetching a patch from a GitHub pull request, prefer the `.diff` endpoint over `.patch`.
- `https://github.com/<owner>/<repo>/pull/<N>.patch` — a `git format-patch` **mbox** containing each commit in the PR separately. `git apply` (which `fetchpatch` and the Nix `patchPhase` use) does **not** replay commit history; it applies hunks against the working tree. PRs that create a file in one commit and delete/rename it in a later commit will fail with errors like `The next patch would delete the file X, which does not exist`.
- `https://github.com/<owner>/<repo>/pull/<N>.diff` — a **squashed** unified diff of the PR's net change. Applies cleanly against any base the PR is mergeable against.
Default to `.diff`. Only fall back to `.patch` if you specifically need authorship metadata (rare for Nix patching). If a previously-working `.patch` URL suddenly fails to apply, switching to `.diff` is the first thing to try.
Always use `lib.fakeHash` (or `pkgs.lib.fakeHash` if only `pkgs` is in scope). This is the only reliable way to set a bogus hash — never write a literal `sha256-...` placeholder string. The build will fail at the FOD with `got: sha256-...` which is the correct value.
**Note:** `.src`, `.goModules`, etc. are sub-attributes of the derivation. They download but do not compile. `nix build .#<name>` (without the `.src` suffix) compiles — never do that.

View File

@@ -0,0 +1,71 @@
---
name: update-vllm-3090-configs
description: Update only the qwen3.6-27b vLLM 3090 llama-swap configs from club-3090 refs; compare diffs, present a plan, and require approval before editing.
---
# Update vLLM 3090 Configs
## Scope
Use only for Qwen3.6 27B vLLM 3090 configs in `modules/nixos/services/llama-swap/`.
Do not use this skill for other models, other Qwen sizes, non-vLLM configs, or package bumps.
Local files:
- `modules/nixos/services/llama-swap/config.nix`
- `modules/nixos/services/llama-swap/setup-qwen36-vllm.sh`
Local config keys:
- `vllm-qwen3.6-27b-tools-text`
- `vllm-qwen3.6-27b-long-text`
- `vllm-qwen3.6-27b-long-vision`
## Hash Tracking
Each config entry stores an upstream commit hash comment:
`# Upstream: club-3090 <hash> (<date>) - <compose-file>`
When comparing, first extract stored hashes. If a config's hash matches
upstream HEAD, skip it (report "already synced"). Only full-diff configs
whose hash differs. Update the hash comment when edits are applied.
## Upstream References
Compare against `club-3090` master:
- `models/qwen3.6-27b/vllm/compose/single/tools-text.yml`
- `models/qwen3.6-27b/vllm/compose/single/long-text.yml`
- `models/qwen3.6-27b/vllm/compose/single/long-vision.yml`
- `scripts/setup.sh` for the current `GENESIS_PIN="${GENESIS_PIN:-...}"`
Use raw URLs or a temp clone under `_scratch/club-3090`. Prefer a temp clone when checking broad changes:
```bash
mkdir -p _scratch
git clone https://github.com/noonghunna/club-3090 _scratch/club-3090 2>/dev/null || git -C _scratch/club-3090 pull --ff-only
```
## Required Workflow
1. Fetch/update upstream refs under `_scratch/club-3090` or fetch the raw files.
2. Extract stored upstream hashes from `# Upstream: club-3090 ...` comments in config.nix. Skip any config whose hash matches upstream HEAD (report "already synced").
3. Compare upstream compose files to the remaining local llama-swap entries. Translate docker-compose semantics into the existing `docker run`/llama-swap format.
4. Compare upstream `scripts/setup.sh` Genesis pin to local `GENESIS_PIN` in `setup-qwen36-vllm.sh`.
5. Check upstream compose volumes/entrypoint for sidecar patches. If patches are added, removed, renamed, or invoked differently, update both:
- runtime mounts and `python3 /patches/...` calls in `config.nix`
- download/install logic and summary in `setup-qwen36-vllm.sh`
6. Ignore these diffs unless the user explicitly asks otherwise:
- `shm_size` / shm-related compose settings
- local timing patch `patch_timings_07351e088.py` and its mount/invocation
- model served-name differences caused by llama-swap `${MODEL_ID}`
- `HUGGING_FACE_HUB_TOKEN`; keep local CUDA device/env choices
- upstream relative paths vs local `/mnt/ssd/vLLM/...` paths
- docker-compose format vs local llama-swap/Nix format
7. Before editing, present:
- upstream files/commit checked
- meaningful diffs found
- ignored diffs
- exact planned local changes
Then wait for explicit user approval.
8. After approval, edit minimally and update the `# Upstream: club-3090 ...` hash comments. Validate:
- `bash -n modules/nixos/services/llama-swap/setup-qwen36-vllm.sh`
- `nix-instantiate --parse modules/nixos/services/llama-swap/config.nix`
9. Summarize changed files and any remaining upstream differences.

110
flake.lock generated
View File

@@ -8,11 +8,11 @@
]
},
"locked": {
"lastModified": 1776147994,
"narHash": "sha256-c5F8jYiB0fjWsP4j/yeszqszA3laflzDj6/pmoJTeG4=",
"lastModified": 1778234684,
"narHash": "sha256-usIHfvSt7aXvMvRGtcbsue3rA13Z+9TW/7I3WBzLqFY=",
"owner": "nix-community",
"repo": "nixos-apple-silicon",
"rev": "81439a7fb8067ab43641efd79c84607701da1ccd",
"rev": "3d7fe422ef6162154830209b9e50bf69e150cff7",
"type": "github"
},
"original": {
@@ -28,16 +28,16 @@
]
},
"locked": {
"lastModified": 1772129556,
"narHash": "sha256-Utk0zd8STPsUJPyjabhzPc5BpPodLTXrwkpXBHYnpeg=",
"lastModified": 1779036909,
"narHash": "sha256-zXcwYQGCT6pzinK+1dBB2ekTVtfxGZAapb3Evdcu4fY=",
"owner": "nix-darwin",
"repo": "nix-darwin",
"rev": "ebec37af18215214173c98cf6356d0aca24a2585",
"rev": "56c666e108467d87d13508936aade6d567f2a501",
"type": "github"
},
"original": {
"owner": "nix-darwin",
"ref": "nix-darwin-25.11",
"ref": "nix-darwin-26.05",
"repo": "nix-darwin",
"type": "github"
}
@@ -51,11 +51,11 @@
"nixpkgs": "nixpkgs_2"
},
"locked": {
"lastModified": 1775584659,
"narHash": "sha256-NA5oZRunqxD+4LNdU7ZKJHqwuazKyAmBjO4OHXL14X4=",
"lastModified": 1778179392,
"narHash": "sha256-W6zorvjBYbzMNvqKIqCdpDF4rq3gj50Xximl56YM9/I=",
"owner": "determinatesystems",
"repo": "determinate",
"rev": "21dcaa011d3d35cf42a04e988eaac9b28c97a707",
"rev": "efd54faa68be8cd777b5c28cab11e638998a0853",
"type": "github"
},
"original": {
@@ -67,37 +67,37 @@
"determinate-nixd-aarch64-darwin": {
"flake": false,
"locked": {
"narHash": "sha256-qLWfYk9qkb21wKCDWnhMfqBFjcdBBJkNUKBlvdHSLgA=",
"narHash": "sha256-z4mCqKI3Qd6weuHrlfzGccJG0giym/VJhKv20ijRSs0=",
"type": "file",
"url": "https://install.determinate.systems/determinate-nixd/tag/v3.17.3/macOS"
"url": "https://install.determinate.systems/determinate-nixd/tag/v3.20.0/macOS"
},
"original": {
"type": "file",
"url": "https://install.determinate.systems/determinate-nixd/tag/v3.17.3/macOS"
"url": "https://install.determinate.systems/determinate-nixd/tag/v3.20.0/macOS"
}
},
"determinate-nixd-aarch64-linux": {
"flake": false,
"locked": {
"narHash": "sha256-0BmprPIRTopvJ2QdImOMP+TujAPVgRdl0bUL3vhqGIY=",
"narHash": "sha256-yW+VNepSRytzfanSssPMJPvwioCcmlZYaBX8++UFkAk=",
"type": "file",
"url": "https://install.determinate.systems/determinate-nixd/tag/v3.17.3/aarch64-linux"
"url": "https://install.determinate.systems/determinate-nixd/tag/v3.20.0/aarch64-linux"
},
"original": {
"type": "file",
"url": "https://install.determinate.systems/determinate-nixd/tag/v3.17.3/aarch64-linux"
"url": "https://install.determinate.systems/determinate-nixd/tag/v3.20.0/aarch64-linux"
}
},
"determinate-nixd-x86_64-linux": {
"flake": false,
"locked": {
"narHash": "sha256-+Q85cySxr0FB/cr97hk/WWYgeJY+iC4OH+FjGYygIbU=",
"narHash": "sha256-+L102C3Hhkd1GlXmRm2eLTLsZKBxEvooiQZFqQRlBf0=",
"type": "file",
"url": "https://install.determinate.systems/determinate-nixd/tag/v3.17.3/x86_64-linux"
"url": "https://install.determinate.systems/determinate-nixd/tag/v3.20.0/x86_64-linux"
},
"original": {
"type": "file",
"url": "https://install.determinate.systems/determinate-nixd/tag/v3.17.3/x86_64-linux"
"url": "https://install.determinate.systems/determinate-nixd/tag/v3.20.0/x86_64-linux"
}
},
"disko": {
@@ -105,11 +105,11 @@
"nixpkgs": "nixpkgs_3"
},
"locked": {
"lastModified": 1773889306,
"narHash": "sha256-PAqwnsBSI9SVC2QugvQ3xeYCB0otOwCacB1ueQj2tgw=",
"lastModified": 1777713215,
"narHash": "sha256-8GzXDOXckDWwST8TY5DbwYFjdvQLlP7K9CLSVx6iTTo=",
"owner": "nix-community",
"repo": "disko",
"rev": "5ad85c82cc52264f4beddc934ba57f3789f28347",
"rev": "63b4e7e6cf75307c1d26ac3762b886b5b0247267",
"type": "github"
},
"original": {
@@ -126,11 +126,11 @@
},
"locked": {
"dir": "pkgs/firefox-addons",
"lastModified": 1776199335,
"narHash": "sha256-ImihxU7ReZZuNdrASq8qzOmmO/UQtkuqQ9V9KKb1dD0=",
"lastModified": 1778385775,
"narHash": "sha256-n0MUvWA2SML/qBB4hpShQ7i+i961MX4oPtaQfYo0+uU=",
"owner": "rycee",
"repo": "nur-expressions",
"rev": "95066e56aaa948f170747f57a20c99511a953eed",
"rev": "268324916742a48cd03b94fd63f2822d6b66d519",
"type": "gitlab"
},
"original": {
@@ -278,16 +278,16 @@
]
},
"locked": {
"lastModified": 1775425411,
"narHash": "sha256-KY6HsebJHEe5nHOWP7ur09mb0drGxYSzE3rQxy62rJo=",
"lastModified": 1780361225,
"narHash": "sha256-wnV9ttf4fPWNonBIQmvlrSlNpQYgx5HgWWd007mwIFA=",
"owner": "nix-community",
"repo": "home-manager",
"rev": "0d02ec1d0a05f88ef9e74b516842900c41f0f2fe",
"rev": "e28654b71096e08c019d4861ca26acb646f583d8",
"type": "github"
},
"original": {
"owner": "nix-community",
"ref": "release-25.11",
"ref": "release-26.05",
"repo": "home-manager",
"type": "github"
}
@@ -301,12 +301,12 @@
"nixpkgs-regression": "nixpkgs-regression"
},
"locked": {
"lastModified": 1775583600,
"narHash": "sha256-/shs/3GA4R3rxhhqpPbEMnDZKbCvf3VpwnHB75nkTcI=",
"rev": "e9b4735be7b90cf49767faf5c36f770ac1bdc586",
"revCount": 24880,
"lastModified": 1778177425,
"narHash": "sha256-oyHvP5HDRe59opmjTrq2ED9lh+R9FrHyaCGPPNfBqWM=",
"rev": "f0ccb960d3ad5bff28acd9cabf8bdef885b5d52f",
"revCount": 25858,
"type": "tarball",
"url": "https://api.flakehub.com/f/pinned/DeterminateSystems/nix-src/3.17.3/019d6913-e8c2-7128-ba76-3dc4f6b58158/source.tar.gz"
"url": "https://api.flakehub.com/f/pinned/DeterminateSystems/nix-src/3.20.0/019e03bc-3f83-7833-aba3-b691ef4956c7/source.tar.gz"
},
"original": {
"type": "tarball",
@@ -351,16 +351,16 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1761597516,
"narHash": "sha256-wxX7u6D2rpkJLWkZ2E932SIvDJW8+ON/0Yy8+a5vsDU=",
"rev": "daf6dc47aa4b44791372d6139ab7b25269184d55",
"revCount": 811874,
"lastModified": 1773222311,
"narHash": "sha256-BHoB/XpbqoZkVYZCfXJXfkR+GXFqwb/4zbWnOr2cRcU=",
"rev": "0590cd39f728e129122770c029970378a79d076a",
"revCount": 909248,
"type": "tarball",
"url": "https://api.flakehub.com/f/pinned/NixOS/nixpkgs/0.2505.811874%2Brev-daf6dc47aa4b44791372d6139ab7b25269184d55/019a3494-3498-707e-9086-1fb81badc7fe/source.tar.gz"
"url": "https://api.flakehub.com/f/pinned/NixOS/nixpkgs/0.2511.909248%2Brev-0590cd39f728e129122770c029970378a79d076a/019ce32b-8ace-7339-b129-cceaa8dd10c6/source.tar.gz"
},
"original": {
"type": "tarball",
"url": "https://flakehub.com/f/NixOS/nixpkgs/0.2505"
"url": "https://flakehub.com/f/NixOS/nixpkgs/0.2511"
}
},
"nixpkgs-23-11": {
@@ -397,11 +397,11 @@
},
"nixpkgs-unstable": {
"locked": {
"lastModified": 1775710090,
"narHash": "sha256-ar3rofg+awPB8QXDaFJhJ2jJhu+KqN/PRCXeyuXR76E=",
"lastModified": 1777954456,
"narHash": "sha256-hGdgeU2Nk87RAuZyYjyDjFL6LK7dAZN5RE9+hrDTkDU=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "4c1018dae018162ec878d42fec712642d214fdfa",
"rev": "549bd84d6279f9852cae6225e372cc67fb91a4c1",
"type": "github"
},
"original": {
@@ -413,12 +413,12 @@
},
"nixpkgs_2": {
"locked": {
"lastModified": 1775464765,
"narHash": "sha256-nex6TL2x1/sVHCyDWcvl1t/dbTedb9bAGC4DLf/pmYk=",
"rev": "83e29f2b8791f6dec20804382fcd9a666d744c07",
"revCount": 975711,
"lastModified": 1777826146,
"narHash": "sha256-wQ/iN5Zp5VIa3ebBibijPnLyKhor+xEbDy4d0goa9Zs=",
"rev": "73c703c22422b8951895a960959dbbaca7296492",
"revCount": 991389,
"type": "tarball",
"url": "https://api.flakehub.com/f/pinned/DeterminateSystems/nixpkgs-weekly/0.1.975711%2Brev-83e29f2b8791f6dec20804382fcd9a666d744c07/019d6689-cde2-7061-b044-e0ef61ade488/source.tar.gz"
"url": "https://api.flakehub.com/f/pinned/DeterminateSystems/nixpkgs-weekly/0.1.991389%2Brev-73c703c22422b8951895a960959dbbaca7296492/019df6c8-934b-7d40-b402-027bb5def30f/source.tar.gz"
},
"original": {
"type": "tarball",
@@ -443,16 +443,16 @@
},
"nixpkgs_4": {
"locked": {
"lastModified": 1776067740,
"narHash": "sha256-B35lpsqnSZwn1Lmz06BpwF7atPgFmUgw1l8KAV3zpVQ=",
"lastModified": 1780203844,
"narHash": "sha256-K5sT4jTpGs15ADhviMKNBH38REpPf5Q6mM1+N6cArVE=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "7e495b747b51f95ae15e74377c5ce1fe69c1765f",
"rev": "b51242d7d43689db2f3be91bd05d5b24fbb469c4",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-25.11",
"ref": "nixos-26.05",
"repo": "nixpkgs",
"type": "github"
}
@@ -501,11 +501,11 @@
]
},
"locked": {
"lastModified": 1776119890,
"narHash": "sha256-Zm6bxLNnEOYuS/SzrAGsYuXSwk3cbkRQZY0fJnk8a5M=",
"lastModified": 1777944972,
"narHash": "sha256-VfGRo1qTBKOe3s2gOv8LSoA6Fk19PvBlwQ1ECN0Evn8=",
"owner": "Mic92",
"repo": "sops-nix",
"rev": "d4971dd58c6627bfee52a1ad4237637c0a2fb0cd",
"rev": "c591bf665727040c6cc5cb409079acb22dcce33c",
"type": "github"
},
"original": {

View File

@@ -2,7 +2,7 @@
description = "NixOS Hosts";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.11";
nixpkgs.url = "github:NixOS/nixpkgs/nixos-26.05";
nixpkgs-unstable.url = "github:NixOS/nixpkgs/nixos-unstable";
disko.url = "github:nix-community/disko";
determinate.url = "github:determinatesystems/determinate";
@@ -11,7 +11,7 @@
inputs.nixpkgs.follows = "nixpkgs";
};
home-manager = {
url = "github:nix-community/home-manager/release-25.11";
url = "github:nix-community/home-manager/release-26.05";
inputs.nixpkgs.follows = "nixpkgs";
};
apple-silicon = {
@@ -31,7 +31,7 @@
inputs.nixpkgs.follows = "nixpkgs";
};
darwin = {
url = "github:nix-darwin/nix-darwin/nix-darwin-25.11";
url = "github:nix-darwin/nix-darwin/nix-darwin-26.05";
inputs.nixpkgs.follows = "nixpkgs";
};
};

View File

@@ -7,7 +7,7 @@ let
inherit (lib.${namespace}) enabled;
in
{
home.stateVersion = "25.11";
home.stateVersion = "26.05";
reichard = {
user = {

View File

@@ -8,7 +8,7 @@ let
inherit (lib.${namespace}) enabled;
in
{
home.stateVersion = "25.11";
home.stateVersion = "26.05";
reichard = {
user = {
@@ -41,6 +41,7 @@ in
services = {
sketchybar = enabled;
open-proxy.server = enabled;
};
security = {

View File

@@ -33,6 +33,16 @@ else
echo " [✓] VM SOCKS Proxy Already Running"
fi
# Reverse tunnel so the VM's `open`/`xdg-open` reach open-proxy serve on this host.
if ! pgrep -f "ssh -N -R 7777:127.0.0.1:7777 adios-cs" > /dev/null; then
echo " [*] VM Open Proxy Starting..."
ssh -N -R 7777:127.0.0.1:7777 adios-cs &> /dev/null &
disown
echo " [✓] VM Open Proxy Started"
else
echo " [✓] VM Open Proxy Already Running"
fi
echo -e " [*] Connecting..."
# Connect to VM

View File

@@ -9,7 +9,7 @@ let
inherit (lib.${namespace}) enabled;
in
{
home.stateVersion = "25.11";
home.stateVersion = "26.05";
reichard = {
user = {
@@ -20,7 +20,7 @@ in
services = {
ssh-agent = enabled;
fusuma = enabled;
swww = enabled;
awww = enabled;
};
security = {
@@ -58,6 +58,7 @@ in
home.packages = with pkgs; [
orca-slicer
reichard.tuxguitar
];
dconf = {

View File

@@ -7,7 +7,7 @@ let
inherit (lib.${namespace}) enabled;
in
{
home.stateVersion = "25.11";
home.stateVersion = "26.05";
reichard = {
user = {
@@ -17,6 +17,7 @@ in
services = {
ssh-agent = enabled;
open-proxy.client = enabled;
};
security = {
@@ -30,6 +31,7 @@ in
enable = true;
customFastFetchLogo = ./prophet.txt;
};
conduit = enabled;
btop = enabled;
claude-code = enabled;
direnv = enabled;

View File

@@ -7,7 +7,7 @@ let
inherit (lib.${namespace}) enabled;
in
{
home.stateVersion = "25.11";
home.stateVersion = "26.05";
reichard = {
user = {

View File

@@ -7,7 +7,7 @@ let
inherit (lib.${namespace}) enabled;
in
{
home.stateVersion = "25.11";
home.stateVersion = "26.05";
reichard = {
user = {

View File

@@ -8,7 +8,7 @@ let
inherit (lib.${namespace}) enabled;
in
{
home.stateVersion = "25.11";
home.stateVersion = "26.05";
reichard = {
user = {

View File

@@ -9,7 +9,7 @@ let
inherit (lib.${namespace}) enabled;
in
{
home.stateVersion = "25.11";
home.stateVersion = "26.05";
reichard = {
user = {
@@ -20,7 +20,7 @@ in
services = {
ssh-agent = enabled;
fusuma = enabled;
swww = enabled;
awww = enabled;
poweralertd = enabled;
};
@@ -68,6 +68,10 @@ in
};
};
home.packages = with pkgs; [
orca-slicer
];
home.pointerCursor = {
gtk.enable = true;
name = "catppuccin-macchiato-mauve-cursors";

View File

@@ -20,7 +20,7 @@ in
services = {
ssh-agent = enabled;
fusuma = enabled;
swww = enabled;
awww = enabled;
};
security = {

View File

@@ -2,11 +2,13 @@
{
home.packages = with pkgs; [
sqlite-interactive
jnv
jq
mosh
ncdu
ripgrep
reichard.codexis
ripgrep
sqlite-interactive
unzip
];
}

View File

@@ -1,4 +1,9 @@
{ config, lib, pkgs, namespace, ... }:
{ config
, lib
, pkgs
, namespace
, ...
}:
let
inherit (lib)
types
@@ -62,40 +67,47 @@ in
extensions.packages = mkOpt (with lib.types; listOf package)
(with pkgs.firefox-addons; [
bitwarden
pkgs.firefox-addons."ctrl-number-to-switch-tabs"
darkreader
gruvbox-dark-theme
kagi-search
sponsorblock
ublock-origin
# bypass-paywalls-clean
]) "Extensions to install";
};
config = mkIf cfg.enable {
programs.firefox = {
enable = true;
configPath = ".mozilla/firefox";
inherit (cfg) policies;
profiles = {
${config.${namespace}.user.name} = {
inherit (cfg) extraConfig extensions;
inherit (cfg) extraConfig;
inherit (config.${namespace}.user) name;
extensions = {
packages = cfg.extensions.packages;
force = true;
};
id = 0;
settings = mkMerge [
cfg.settings
{
"browser.aboutConfig.showWarning" = false;
"extensions.autoDisableScopes" = 0;
"extensions.activeThemeID" = "{eb8c4a94-e603-49ef-8e81-73d3c4cc04ff}";
"browser.aboutwelcome.enabled" = false;
"browser.sessionstore.warnOnQuit" = true;
"browser.newtabpage.activity-stream.showSponsoredTopSites" = false;
"browser.shell.checkDefaultBrowser" = false;
"general.smoothScroll.msdPhysics.enabled" = true;
"intl.accept_languages" = "en-US,en";
"ui.key.accelKey" = "224";
"ui.key.accelKey" = 91;
# "devtools.chrome.enabled" = true;
# "xpinstall.signatures.required" = false;

View File

@@ -1,154 +0,0 @@
exec-once = uwsm app -- waybar
exec-once = uwsm app -- $terminal
exec-once = uwsm app -- firefox
general {
gaps_in = 5
gaps_out = 12
border_size = 2
# https://wiki.hyprland.org/Configuring/Variables/#variable-types for info about colors
col.active_border = rgba(33ccffee) rgba(00ff99ee) 45deg
col.inactive_border = rgba(595959aa)
# Set to true enable resizing windows by clicking and dragging on borders and gaps
resize_on_border = false
# Please see https://wiki.hyprland.org/Configuring/Tearing/ before you turn this on
allow_tearing = false
layout = dwindle
}
# https://wiki.hyprland.org/Configuring/Variables/#decoration
decoration {
rounding = 10
active_opacity = 1.0
inactive_opacity = 1.0
shadow {
enabled = true
range = 4
render_power = 3
color = rgba(1a1a1aee)
}
blur {
enabled = true
size = 3
passes = 1
vibrancy = 0.1696
}
}
# https://wiki.hyprland.org/Configuring/Variables/#animations
animations {
enabled = yes, please :)
bezier = easeOutQuint,0.23,1,0.32,1
bezier = easeInOutCubic,0.65,0.05,0.36,1
bezier = linear,0,0,1,1
bezier = almostLinear,0.5,0.5,0.75,1.0
bezier = quick,0.15,0,0.1,1
animation = global, 1, 10, default
animation = border, 1, 5.39, easeOutQuint
animation = windows, 1, 4.79, easeOutQuint
animation = windowsIn, 1, 4.1, easeOutQuint, popin 87%
animation = windowsOut, 1, 1.49, linear, popin 87%
animation = fadeIn, 1, 1.73, almostLinear
animation = fadeOut, 1, 1.46, almostLinear
animation = fade, 1, 3.03, quick
animation = layers, 1, 3.81, easeOutQuint
animation = layersIn, 1, 4, easeOutQuint, fade
animation = layersOut, 1, 1.5, linear, fade
animation = fadeLayersIn, 1, 1.79, almostLinear
animation = fadeLayersOut, 1, 1.39, almostLinear
animation = workspaces, 1, 1.94, almostLinear, fade
animation = workspacesIn, 1, 1.21, almostLinear, fade
animation = workspacesOut, 1, 1.94, almostLinear, fade
}
# Ref https://wiki.hyprland.org/Configuring/Workspace-Rules/
# "Smart gaps" / "No gaps when only"
# uncomment all if you wish to use that.
# workspace = w[tv1], gapsout:0, gapsin:0
# workspace = f[1], gapsout:0, gapsin:0
# windowrulev2 = bordersize 0, floating:0, onworkspace:w[tv1]
# windowrulev2 = rounding 0, floating:0, onworkspace:w[tv1]
# windowrulev2 = bordersize 0, floating:0, onworkspace:f[1]
# windowrulev2 = rounding 0, floating:0, onworkspace:f[1]
# See https://wiki.hyprland.org/Configuring/Dwindle-Layout/ for more
dwindle {
pseudotile = true # Master switch for pseudotiling. Enabling is bound to mainMod + P in the keybinds section below
preserve_split = true # You probably want this
}
# See https://wiki.hyprland.org/Configuring/Master-Layout/ for more
master {
new_status = master
}
# https://wiki.hyprland.org/Configuring/Variables/#misc
misc {
force_default_wallpaper = -1 # Set to 0 or 1 to disable the anime mascot wallpapers
disable_hyprland_logo = false # If true disables the random hyprland logo / anime girl background. :(
}
#############
### INPUT ###
#############
# https://wiki.hyprland.org/Configuring/Variables/#input
input {
kb_layout = us
kb_variant =
kb_model =
kb_options =
kb_rules =
follow_mouse = 1
sensitivity = 0.0 # -1.0 - 1.0, 0 means no modification.
touchpad {
scroll_factor = 0.5
disable_while_typing = true
natural_scroll = true
clickfinger_behavior = true
tap-to-click = false
}
}
# https://wiki.hyprland.org/Configuring/Variables/#gestures
gesture = 4, horizontal, workspace, invert
# Thinkpad Trackpoint
device {
name = tpps/2-elan-trackpoint
sensitivity = -0.3
}
##############################
### WINDOWS AND WORKSPACES ###
##############################
# See https://wiki.hyprland.org/Configuring/Window-Rules/ for more
# See https://wiki.hyprland.org/Configuring/Workspace-Rules/ for workspace rules
# Example windowrule v1
# windowrule = float, ^(kitty)$
# Example windowrule v2
# windowrulev2 = float,class:^(kitty)$,title:^(kitty)$
# Ignore maximize requests from apps. You'll probably like this.
windowrulev2 = suppressevent maximize, class:.*
# Fix some dragging issues with XWayland
windowrulev2 = nofocus,class:^$,title:^$,xwayland:1,floating:1,fullscreen:0,pinned:0

View File

@@ -0,0 +1,217 @@
-- Hyprland config (lua backend, Hyprland 0.55+).
-- `mainMod`, `menuMod`, and the monitor(s) are injected by Nix above this file.
-- See https://wiki.hypr.land/Configuring/Start/
local terminal = "ghostty"
local menu = "wofi --show drun"
-------------------
---- AUTOSTART ----
-------------------
hl.on("hyprland.start", function()
hl.exec_cmd("uwsm app -- waybar")
hl.exec_cmd("uwsm app -- " .. terminal)
hl.exec_cmd("uwsm app -- firefox")
end)
-----------------------
---- LOOK AND FEEL ----
-----------------------
hl.config({
general = {
gaps_in = 5,
gaps_out = 12,
border_size = 2,
col = {
active_border = { colors = { "rgba(33ccffee)", "rgba(00ff99ee)" }, angle = 45 },
inactive_border = "rgba(595959aa)",
},
resize_on_border = false,
allow_tearing = false,
layout = "dwindle",
},
decoration = {
rounding = 10,
active_opacity = 1.0,
inactive_opacity = 1.0,
shadow = {
enabled = true,
range = 4,
render_power = 3,
color = 0xee1a1a1a,
},
blur = {
enabled = true,
size = 3,
passes = 1,
vibrancy = 0.1696,
},
},
animations = {
enabled = true,
},
dwindle = {
preserve_split = true,
},
master = {
new_status = "master",
},
misc = {
force_default_wallpaper = -1,
disable_hyprland_logo = false,
},
})
----------------------
---- ANIMATIONS ------
----------------------
hl.curve("easeOutQuint", { type = "bezier", points = { { 0.23, 1 }, { 0.32, 1 } } })
hl.curve("easeInOutCubic", { type = "bezier", points = { { 0.65, 0.05 }, { 0.36, 1 } } })
hl.curve("linear", { type = "bezier", points = { { 0, 0 }, { 1, 1 } } })
hl.curve("almostLinear", { type = "bezier", points = { { 0.5, 0.5 }, { 0.75, 1 } } })
hl.curve("quick", { type = "bezier", points = { { 0.15, 0 }, { 0.1, 1 } } })
hl.animation({ leaf = "global", enabled = true, speed = 10, bezier = "default" })
hl.animation({ leaf = "border", enabled = true, speed = 5.39, bezier = "easeOutQuint" })
hl.animation({ leaf = "windows", enabled = true, speed = 4.79, bezier = "easeOutQuint" })
hl.animation({ leaf = "windowsIn", enabled = true, speed = 4.1, bezier = "easeOutQuint", style = "popin 87%" })
hl.animation({ leaf = "windowsOut", enabled = true, speed = 1.49, bezier = "linear", style = "popin 87%" })
hl.animation({ leaf = "fadeIn", enabled = true, speed = 1.73, bezier = "almostLinear" })
hl.animation({ leaf = "fadeOut", enabled = true, speed = 1.46, bezier = "almostLinear" })
hl.animation({ leaf = "fade", enabled = true, speed = 3.03, bezier = "quick" })
hl.animation({ leaf = "layers", enabled = true, speed = 3.81, bezier = "easeOutQuint" })
hl.animation({ leaf = "layersIn", enabled = true, speed = 4, bezier = "easeOutQuint", style = "fade" })
hl.animation({ leaf = "layersOut", enabled = true, speed = 1.5, bezier = "linear", style = "fade" })
hl.animation({ leaf = "fadeLayersIn", enabled = true, speed = 1.79, bezier = "almostLinear" })
hl.animation({ leaf = "fadeLayersOut", enabled = true, speed = 1.39, bezier = "almostLinear" })
hl.animation({ leaf = "workspaces", enabled = true, speed = 1.94, bezier = "almostLinear", style = "fade" })
hl.animation({ leaf = "workspacesIn", enabled = true, speed = 1.21, bezier = "almostLinear", style = "fade" })
hl.animation({ leaf = "workspacesOut", enabled = true, speed = 1.94, bezier = "almostLinear", style = "fade" })
---------------
---- INPUT ----
---------------
hl.config({
input = {
kb_layout = "us",
kb_variant = "",
kb_model = "",
kb_options = "",
kb_rules = "",
follow_mouse = 1,
sensitivity = 0.0,
touchpad = {
scroll_factor = 0.5,
disable_while_typing = true,
natural_scroll = true,
clickfinger_behavior = true,
tap_to_click = false,
},
},
})
-- 4-finger horizontal swipe to switch workspaces. The old `invert` modifier was
-- removed in the 0.51 gesture rework; flip the physical swipe direction if needed.
hl.gesture({ fingers = 4, direction = "horizontal", action = "workspace" })
-- Thinkpad Trackpoint
hl.device({ name = "tpps/2-elan-trackpoint", sensitivity = -0.3 })
---------------------
---- KEYBINDINGS ----
---------------------
-- Menu Mod Bindings (macOS Transition - Spotlight & Screenshots)
hl.bind(menuMod .. " + SPACE", hl.dsp.exec_cmd(menu))
hl.bind(menuMod .. " + SHIFT + 1", hl.dsp.exec_cmd("hyprshot -m output"))
hl.bind(menuMod .. " + SHIFT + 2", hl.dsp.exec_cmd("hyprshot -m window"))
hl.bind(menuMod .. " + SHIFT + 3", hl.dsp.exec_cmd("hyprshot -m region"))
hl.bind(menuMod .. " + Q", hl.dsp.window.close())
-- Primary Bindings
hl.bind(mainMod .. " + RETURN", hl.dsp.exec_cmd(terminal))
hl.bind(mainMod .. " + M", hl.dsp.exec_cmd("uwsm stop"))
hl.bind(mainMod .. " + V", hl.dsp.window.float({ action = "toggle" }))
hl.bind(mainMod .. " + P", hl.dsp.window.pin())
hl.bind(mainMod .. " + J", hl.dsp.layout("togglesplit"))
hl.bind(mainMod .. " + S", hl.dsp.workspace.toggle_special("magic"))
hl.bind(mainMod .. " + SHIFT + S", hl.dsp.window.move({ workspace = "special:magic" }))
-- Window Focus
hl.bind(mainMod .. " + left", hl.dsp.focus({ direction = "left" }))
hl.bind(mainMod .. " + right", hl.dsp.focus({ direction = "right" }))
hl.bind(mainMod .. " + up", hl.dsp.focus({ direction = "up" }))
hl.bind(mainMod .. " + down", hl.dsp.focus({ direction = "down" }))
-- Workspace switch + move active window to workspace (1-9, 0 -> 10)
for i = 1, 10 do
local key = i % 10
hl.bind(mainMod .. " + " .. key, hl.dsp.focus({ workspace = i }))
hl.bind(mainMod .. " + SHIFT + " .. key, hl.dsp.window.move({ workspace = i }))
end
hl.bind(mainMod .. " + SHIFT + right", hl.dsp.focus({ workspace = "+1" }))
hl.bind(mainMod .. " + SHIFT + left", hl.dsp.focus({ workspace = "-1" }))
-- Window move/resize with mouse
hl.bind(mainMod .. " + mouse:272", hl.dsp.window.drag(), { mouse = true })
hl.bind(mainMod .. " + mouse:273", hl.dsp.window.resize(), { mouse = true })
-- Multimedia & Brightness Keys
hl.bind("XF86AudioRaiseVolume", hl.dsp.exec_cmd("wpctl set-volume @DEFAULT_AUDIO_SINK@ 5%+"), { locked = true, repeating = true })
hl.bind("XF86AudioLowerVolume", hl.dsp.exec_cmd("wpctl set-volume @DEFAULT_AUDIO_SINK@ 5%-"), { locked = true, repeating = true })
hl.bind("XF86AudioMute", hl.dsp.exec_cmd("wpctl set-mute @DEFAULT_AUDIO_SINK@ toggle"), { locked = true, repeating = true })
hl.bind("XF86AudioMicMute", hl.dsp.exec_cmd("wpctl set-mute @DEFAULT_AUDIO_SOURCE@ toggle"), { locked = true, repeating = true })
hl.bind("XF86MonBrightnessUp", hl.dsp.exec_cmd("brightnessctl s 4%+"), { locked = true, repeating = true })
hl.bind("XF86MonBrightnessDown", hl.dsp.exec_cmd("brightnessctl s 5%-"), { locked = true, repeating = true })
-- macOS Keyboard Brightness
hl.bind(menuMod .. " + XF86MonBrightnessUp", hl.dsp.exec_cmd("brightnessctl -d kbd_backlight s 10%+"), { locked = true, repeating = true })
hl.bind(menuMod .. " + XF86MonBrightnessDown", hl.dsp.exec_cmd("brightnessctl -d kbd_backlight s 10%-"), { locked = true, repeating = true })
-- Player Controls
hl.bind("XF86AudioNext", hl.dsp.exec_cmd("playerctl next"), { locked = true })
hl.bind("XF86AudioPause", hl.dsp.exec_cmd("playerctl play-pause"), { locked = true })
hl.bind("XF86AudioPlay", hl.dsp.exec_cmd("playerctl play-pause"), { locked = true })
hl.bind("XF86AudioPrev", hl.dsp.exec_cmd("playerctl previous"), { locked = true })
--------------------------------
---- WINDOWS AND WORKSPACES ----
--------------------------------
hl.window_rule({
name = "suppress-maximize-events",
match = { class = ".*" },
suppress_event = "maximize",
})
hl.window_rule({
name = "fix-xwayland-drags",
match = {
class = "^$",
title = "^$",
xwayland = true,
float = true,
fullscreen = false,
pin = false,
},
no_focus = true,
})

View File

@@ -24,90 +24,27 @@ in
wayland.windowManager.hyprland = {
enable = true;
extraConfig = builtins.readFile ./config/hyprland.conf;
settings = {
"$mainMod" = cfg.mainMod;
"$menuMod" = cfg.menuMod;
"$terminal" = "ghostty";
"$menu" = "wofi --show drun";
# Lua Backend - Hyprland 0.55 deprecated hyprlang and home-manager 26.05 defaults configType to "lua".
configType = "lua";
extraConfig =
let
# Quote unless the value is numeric, so scale can be `2` or `"auto"`.
luaScalar = v: if builtins.match "[0-9]+(\\.[0-9]+)?" v != null then v else ''"${v}"'';
mkMonitor =
s:
let
parts = map lib.trim (lib.splitString "," s);
field = i: if builtins.length parts > i then builtins.elemAt parts i else "";
in
''hl.monitor({ output = "${field 0}", mode = "${field 1}", position = "${field 2}", scale = ${luaScalar (field 3)} })'';
in
''
local mainMod = "${cfg.mainMod}"
local menuMod = "${cfg.menuMod}"
monitor = cfg.monitors;
bind = [
# Menu Mod Bindings (macOS Transition - Spotlight & Screenshots)
"$menuMod, SPACE, exec, $menu"
"$menuMod SHIFT, 1, exec, hyprshot -m output"
"$menuMod SHIFT, 2, exec, hyprshot -m window"
"$menuMod SHIFT, 3, exec, hyprshot -m region"
"$menuMod, Q, killactive"
# Primary Bindings
"$mainMod, RETURN, exec, $terminal"
"$mainMod, M, exit"
"$mainMod, V, togglefloating"
"$mainMod, P, pin"
"$mainMod, J, togglesplit"
"$mainMod, S, togglespecialworkspace, magic"
"$mainMod SHIFT, S, movetoworkspace, special:magic"
# Window Focus
"$mainMod, left, movefocus, l"
"$mainMod, right, movefocus, r"
"$mainMod, up, movefocus, u"
"$mainMod, down, movefocus, d"
# Workspace Switch
"$mainMod, 1, workspace, 1"
"$mainMod, 2, workspace, 2"
"$mainMod, 3, workspace, 3"
"$mainMod, 4, workspace, 4"
"$mainMod, 5, workspace, 5"
"$mainMod, 6, workspace, 6"
"$mainMod, 7, workspace, 7"
"$mainMod, 8, workspace, 8"
"$mainMod, 9, workspace, 9"
"$mainMod, 0, workspace, 10"
# Window Workspace Move
"$mainMod SHIFT, 1, movetoworkspace, 1"
"$mainMod SHIFT, 2, movetoworkspace, 2"
"$mainMod SHIFT, 3, movetoworkspace, 3"
"$mainMod SHIFT, 4, movetoworkspace, 4"
"$mainMod SHIFT, 5, movetoworkspace, 5"
"$mainMod SHIFT, 6, movetoworkspace, 6"
"$mainMod SHIFT, 7, movetoworkspace, 7"
"$mainMod SHIFT, 8, movetoworkspace, 8"
"$mainMod SHIFT, 9, movetoworkspace, 9"
"$mainMod SHIFT, 0, movetoworkspace, 10"
"$mainMod SHIFT, right, workspace, +1"
"$mainMod SHIFT, left, workspace, -1"
];
bindm = [
# Window Resizing
"$mainMod, mouse:272, movewindow"
"$mainMod, mouse:273, resizewindow"
];
bindel = [
# Multimedia & Brightness Keys
",XF86AudioRaiseVolume, exec, wpctl set-volume @DEFAULT_AUDIO_SINK@ 5%+"
",XF86AudioLowerVolume, exec, wpctl set-volume @DEFAULT_AUDIO_SINK@ 5%-"
",XF86AudioMute, exec, wpctl set-mute @DEFAULT_AUDIO_SINK@ toggle"
",XF86AudioMicMute, exec, wpctl set-mute @DEFAULT_AUDIO_SOURCE@ toggle"
",XF86MonBrightnessUp, exec, brightnessctl s 4%+"
",XF86MonBrightnessDown, exec, brightnessctl s 5%-"
# macOS Keyboard Brightness
"$menuMod, XF86MonBrightnessUp, exec, brightnessctl -d kbd_backlight s 10%+"
"$menuMod, XF86MonBrightnessDown, exec, brightnessctl -d kbd_backlight s 10%-"
];
bindl = [
# Player Controls
", XF86AudioNext, exec, playerctl next"
", XF86AudioPause, exec, playerctl play-pause"
", XF86AudioPlay, exec, playerctl play-pause"
", XF86AudioPrev, exec, playerctl previous"
];
};
${lib.concatMapStringsSep "\n" mkMonitor cfg.monitors}
''
+ builtins.readFile ./config/hyprland.lua;
};
programs.waybar = {
@@ -117,9 +54,9 @@ in
{
layer = "top";
position = "top";
mod = "dock";
mode = "dock";
exclusive = true;
passtrough = false;
passthrough = false;
gtk-layer-shell = true;
height = 0;
modules-left = [

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env bash
MODEL="vllm-qwen3.6-27b-long-text"
MODEL="qwen3.6-27b-vllm-180k-cuda0"
SYSTEM_PROMPT="You are a shell command expert. Given a natural language query, generate a single shell command that accomplishes the task."
# Colors

View File

@@ -2,3 +2,5 @@ _scratch
.direnv
.envrc
.agents
.pi
.nvim.lua

View File

@@ -2,11 +2,7 @@
local diagnostics_active = true
local toggle_diagnostics = function()
diagnostics_active = not diagnostics_active
if diagnostics_active then
vim.diagnostic.enable()
else
vim.diagnostic.disable()
end
vim.diagnostic.enable(diagnostics_active)
end
local diagnostics_loclist_active = false

View File

@@ -1,6 +1,6 @@
local llm_endpoint = "https://llm-api.va.reichard.io"
local llm_assistant_model = "vllm-qwen3.6-27b-tools-text "
local llm_infill_model = "qwen3.5-4b-thinking"
local llm_assistant_model = "qwen3.6-27b-vllm-75k-cuda0"
local llm_infill_model = "qwen3.5-4b-cuda1"
local current_fim = "llama"
-- Copilot Configuration

View File

@@ -241,7 +241,12 @@ setup_lsp("gopls", {
})
end,
filetypes = { "go" },
cmd = { "gopls", "-remote=auto" },
cmd = function(dispatchers, config)
return vim.lsp.rpc.start({ "gopls", "-remote=auto" }, dispatchers, {
cwd = config.root_dir,
env = { GOMEMLIMIT = "6GiB" },
})
end,
settings = {
gopls = {
buildFlags = { "-tags=e2e" },
@@ -304,3 +309,30 @@ none_ls.setup({
end
end,
})
------------------------------------------------------
---------------------- EXRC LSP ----------------------
------------------------------------------------------
vim.o.exrc = true -- native path: <cwd>/.nvim.lua on startup
local loaded = {} -- absolute path -> true
local function load_project_config(buf)
local fname = vim.api.nvim_buf_get_name(buf)
if fname == "" then return end
local found = vim.fs.find(".nvim.lua", {
upward = true,
path = vim.fs.dirname(fname),
})[1]
if not found or loaded[found] then return end
local content = vim.secure.read(found)
if content then
loaded[found] = true
local chunk, err = loadfile(found)
if chunk then chunk() else vim.notify("project config: " .. err, vim.log.levels.ERROR) end
end
end
vim.api.nvim_create_autocmd({ "BufReadPost", "BufNewFile" }, {
callback = function(args) load_project_config(args.buf) end,
})

View File

@@ -49,7 +49,7 @@ local function pr_status()
end
require("lualine").setup({
options = { theme = "catppuccin" },
options = { theme = "catppuccin-mocha" },
sections = {
lualine_c = { { pr_status } },
-- lualine_z = { require("opencode").statusline }

View File

@@ -1,4 +1,7 @@
require("nvim-treesitter.configs").setup({
highlight = { enable = true, additional_vim_regex_highlighting = false },
})
vim.treesitter.language.register("markdown", "octo")
vim.api.nvim_create_autocmd("FileType", {
callback = function(args)
pcall(vim.treesitter.start, args.buf)
end,
})

View File

@@ -135,18 +135,18 @@ in
golangci-lint-langserver
lua-language-server
nil
nodePackages.eslint
nodePackages.svelte-language-server
nodePackages.typescript
nodePackages.typescript-language-server
nodePackages.vscode-langservers-extracted
eslint
svelte-language-server
typescript
typescript-language-server
vscode-langservers-extracted
pyright
python312Packages.autopep8
# Formatters
luaformatter
nixpkgs-fmt
nodePackages.prettier
prettier
stylua
sql-formatter
@@ -177,9 +177,9 @@ in
clangd = "${pkgs.clang-tools}/bin/clangd",
golintls = "${pkgs.golangci-lint-langserver}/bin/golangci-lint-langserver",
luals = "${pkgs.lua-language-server}/bin/lua-language-server",
sveltels = "${pkgs.nodePackages.svelte-language-server}/bin/svelteserver",
tsls = "${pkgs.nodePackages.typescript-language-server}/bin/typescript-language-server",
vscls = "${pkgs.nodePackages.vscode-langservers-extracted}",
sveltels = "${pkgs.svelte-language-server}/bin/svelteserver",
tsls = "${pkgs.typescript-language-server}/bin/typescript-language-server",
vscls = "${pkgs.vscode-langservers-extracted}",
sqls = "${pkgs.sqls}/bin/sqls",
}
return nix_vars

View File

@@ -1,82 +1,81 @@
# AI Agent Guidelines
## Important Rules
Be cognizant of context use; this file is loaded for all LLMs. Keep guidance concise and high-signal.
1. **Timeout for bash tool**: The `bash` tool MUST have a timeout specified. Without a timeout, the tool will hang indefinitely and cause the task to fail.
## Critical Rules
2. **File writing**: Do NOT use `cat` with heredocs to write files. Use the `write` tool instead (or `edit` for modifications).
1. **Bash timeouts**: Every `bash` tool call MUST specify a timeout.
```bash
bash(command="some command", timeout=30)
```
3. **Ephemeral files**: Put temporary scripts, plans, notes, and other scratch artifacts in `_scratch/`. It is gitignored, and reusable exploration/testing scripts should be iterated there instead of recreated repeatedly.
2. **File writing**: Do NOT use `cat` with heredocs to write files. Use `write` for new/rewritten files and `edit` for targeted modifications.
## Example of Correct Usage
3. **Scratch files**: Put temporary scripts, plans, notes, and reusable exploration artifacts in `_scratch/`. It is gitignored.
### Incorrect (will hang):
4. **Missing commands**: If a tool is not installed, prefer `nix run` instead of installing it.
```bash
nix run nixpkgs#python3 -- script.py
```
```bash
bash(command="some long-running command")
```
## Asking Questions
### Correct (with timeout):
If a task is ambiguous, underspecified, or you foresee a non-obvious tradeoff during implementation, **surface it before coding** rather than guessing and producing rework. Treat this as always-on; an explicit "any questions?" is never required.
```bash
bash(command="some command", timeout=30)
```
## Context Discipline
### Incorrect (file writing):
Prefer a **search → targeted read** pattern:
```bash
bash(command="cat > file.txt << 'EOF'\ncontent\nEOF")
```
1. Search with `rg -n` / `grep -n` to find relevant line numbers.
2. Read only the needed range with `read(path, offset, limit)`.
### Correct (file writing):
```bash
write(path="file.txt", content="content")
```
## Reading Files
Prefer a **search → targeted read** pattern to minimize context usage:
1. **Search** with `grep -n` / `rg -n` to find relevant line numbers.
2. **Read** only the needed range using `read(path, offset, limit)` or `sed -n 'X,Yp'`.
```bash
# Find the relevant lines
bash(command="rg -n 'functionName' src/", timeout=10)
# Read just that region (e.g. lines 42-70)
read(path="src/foo.go", offset=42, limit=29)
```
Full-file reads are fine when genuinely needed (small files, needing full picture), but avoid them as the default reflex.
Full-file reads are fine when genuinely needed, but avoid them as the default reflex.
## Principles
1. **KISS / YAGNI** - Keep solutions simple and straightforward. Don't introduce abstractions, generics, or indirection unless there is a concrete, immediate need. Prefer obvious code over clever code.
1. **Priority order**: When goals conflict, optimize in this order:
1. **Correctness** — solve the actual use case, including the realistic failure modes (not just the happy path).
2. **Maintainability / readability** — non-negotiable. Code is read far more than it is written; clarity wins over cleverness.
3. **Abstraction & polish** — only after the above are solid, and only when a concrete need justifies it.
2. **Maintain AGENTS.md** - If the project has an `AGENTS.md`, keep it up to date as conventions or architecture evolve. However, follow the **BLUF** (Bottom Line Up Front) principle: keep it concise, actionable, and context-size conscious. Don't overload it with information that belongs in code comments or external docs.
All three matter, but never sacrifice (2) for (3). Prefer obvious, boring code over slick code that requires a paragraph to explain.
2. **KISS / YAGNI**: Avoid abstractions, generics, or indirection unless there is a concrete, present need. Speculative flexibility is a maintainability tax.
3. **Maintain AGENTS.md**: Keep project guidance up to date, but BLUF: concise, actionable, and context-size conscious.
4. **Rephrase over append**: When extending existing content (docs, comments, prose, code), prefer rephrasing to capture the new intent over tacking on more verbosity.
5. **Positive framing over prohibition**: State what _to_ do, not what _not_ to do. Default to omitting an instruction entirely rather than adding a "don't do X" rule — omission costs less context and avoids the failure mode where deleting a prohibition gets inverted into a mandate. Reserve explicit prohibitions for cases where the wrong behavior is a likely default that positive guidance alone can't redirect.
6. **Knowledge Capture Check**: Before the final response, ask whether the task revealed a non-obvious convention, pitfall, repeatable workflow, or missing helper. If yes, briefly recommend exactly where to capture it: package/project AGENTS.md, global AGENTS.md, a skill, or a helper script. Skip this note when there is nothing meaningful.
## Style
### Comment Style
A logical "block" of code (doesn't have to be a scope, but a cohesive group of statements responsible for something) should have a comment above it with a short "title". The title must be in **Title Case**. For example:
Default to **no comment**. Code should be self-explanatory through naming and structure. Only add a comment when it earns its place by explaining something the code cannot.
Write a comment when, and only when:
1. The _why_ is non-obvious (intent, constraint, workaround, surprising invariant).
2. A reader familiar with the language/codebase would otherwise stop and ask "why?".
Do not narrate _what_ the code does. Do not add Title Case section headers over logical blocks just to label them.
When a comment _is_ warranted, use a short Title Case label, a dash, and the _why_:
```go
// Map Component Results
// Map Component Results - Downstream consumers expect a name-keyed lookup.
for _, comp := range components {
results[comp.Name] = comp.Result
}
```
If the block is more complicated or non-obvious, explain _why_ it does what it does after the title:
Rules for the explanation after the dash:
```go
// Map Component Results - This is needed because downstream consumers
// expect a name-keyed lookup. Without it, the renderer would fall back
// to O(n) scans on every frame.
for _, comp := range components {
results[comp.Name] = comp.Result
}
```
- Keep it to **23 sentences max**. Never a paragraph.
- State the _why_ directly. Do not restate what the code does, recap prior context, or hedge.
- Do **not** hard-wrap comments at 80 columns. Up to ~120 is fine.
If a block is complex enough that it needs a heading just to be navigable, that is usually a signal to extract a well-named function instead.

View File

@@ -8,11 +8,10 @@ export default function replacePiWithClaudeCodeExtension(pi: ExtensionAPI) {
return undefined;
}
// Replace "pi" With "claude code" - Exclude Literal ".pi" (e.g. Paths)
// And "pi-coding-agent" (Package Name)
// Replace "pi" With "claude code"
const transformedSystemPrompt = event.systemPrompt.replace(
/(?<!\.)pi(?!-coding-agent)/gi,
"claude code",
/(^|\s)pi(?![\w-])/gi,
"$1claude code",
);
if (transformedSystemPrompt === event.systemPrompt) {

View File

@@ -14,6 +14,7 @@ Scaffold a new skill directory with a `SKILL.md` and optional helper scripts und
### 1. Gather Requirements
Ask the user:
- **What does the skill do?** (trigger conditions, purpose)
- **Are there repeatable commands?** (if yes, these become scripts)
@@ -26,23 +27,29 @@ Create `skills/<skill-name>/SKILL.md` with this structure:
```markdown
---
name: <skill-name>
description: '<One-liner: what it does and when to trigger. Keep under ~200 chars.>'
description: "<One-liner: what it does and when to trigger. Keep under ~200 chars.>"
---
# <Skill Title>
## Overview
[1-2 sentences on purpose and scope]
## Workflow
[Numbered steps the agent follows]
```
**Guidelines:**
- **Be concise.** Skills are injected into agent context — every line costs tokens. Aim for the minimum needed to reliably guide the agent.
- **Use scripts for repeatable logic.** If a step involves a multi-line shell command, `jq` pipeline, or API call that won't change between runs, put it in a `.sh` file next to `SKILL.md` and reference it from the workflow. See `address-gh-review/` for an example.
- **Needs configurable values (paths, identifiers, etc.; not secrets — values are stored as plaintext files)?** Copy `assets/variable.sh` into the new skill's `scripts/` dir as-is. Callers use `variable.sh --get NAME [--require-exec RELPATH]`; the helper prints self-explaining `--set` instructions on "unset" or "set-but-invalid" and exits non-zero, so callers just propagate. The helper self-ignores its `.vars/` store on first `--set`, so no `.gitignore` setup is needed.
- **Frontmatter is required.** `name` and `description` fields. The description is what the agent uses to decide whether to load the skill, so make it specific about trigger conditions.
- **Don't over-specify.** Trust the agent to fill gaps. Document the _what_ and _when_, not every micro-step.
- **Frame positively; omit rather than prohibit.** Write what the agent _should_ do. Prefer leaving a rule out over adding "don't do X" (see AGENTS.md principle: _Positive framing over prohibition_).
- **Split workflow from reference when the reference surface grows.** If a skill accumulates lookup tables, mapping rules, or capability references that the workflow consults, move them into a sibling `<skill>/<category>/` directory (e.g. `mappings/`, `references/`) with one sub-doc per category and an index `README.md`. Keep `SKILL.md` focused on the hot path — workflow, hard rules, and a short table pointing at the sub-docs. Include a brief style guide in the index README covering (a) defer to authoritative sources (stubs, schemas, generated docs) whenever possible, (b) row/entry formatting conventions, (c) when to create a new sub-doc vs. extend an existing one.
### 3. Present for Review

View File

@@ -0,0 +1,60 @@
#!/usr/bin/env bash
# Skill-local variable store. Values live in <skill-dir>/.vars/<NAME>.
#
# Usage:
# variable.sh --get NAME # prints value to stdout, exits 0
# # or prints a self-explaining hint to
# # stderr and exits 2 if unset.
# variable.sh --set NAME VALUE # writes value, exits 0.
#
# Callers should treat a non-zero exit as fatal; the stderr message tells
# the caller (agent or user) exactly how to populate the missing value.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SKILL_DIR="$(dirname "$SCRIPT_DIR")"
STORE="$SKILL_DIR/.vars"
SELF="$0"
usage() {
cat >&2 <<EOF
Usage:
$SELF --get NAME
$SELF --set NAME VALUE
EOF
exit 2
}
case "${1:-}" in
--get)
[[ $# -eq 2 ]] || usage
name="$2"
file="$STORE/$name"
if [[ ! -f "$file" ]]; then
cat >&2 <<EOF
$SELF: $name is not set.
Ask the user for the value, then set it:
$SELF --set $name <value>
EOF
exit 2
fi
cat "$file"
;;
--set)
[[ $# -eq 3 ]] || usage
name="$2"; value="$3"
[[ "$name" =~ ^[A-Z][A-Z0-9_]*$ ]] || {
echo >&2 "$SELF: invalid name '$name' (must match [A-Z][A-Z0-9_]*)"
exit 2
}
mkdir -p "$STORE"
# Self-ignore the store so values never get committed, even if the
# skill root lacks a .gitignore entry for .vars/.
[[ -f "$STORE/.gitignore" ]] || printf '*\n' > "$STORE/.gitignore"
printf '%s' "$value" > "$STORE/$name"
;;
*)
usage
;;
esac

View File

@@ -126,6 +126,8 @@ EOF
- Imperative mood: "fix bug" not "fixes bug"
- Reference issues: `Closes #123`, `Refs #456`
- Keep description under 72 characters
- Keep commit bodies short: 4 sentences max.
- If you are unsure whether a body is useful, omit it entirely.
## Git Safety Protocol

View File

@@ -1,112 +0,0 @@
---
name: web-glimpse
description: 'Search the web, read pages, extract content, run JavaScript, or capture screenshots using the `glimpse` headless browser tool. Use when the user asks to search the web, look something up online, read/fetch a page, inspect dynamic content, or capture visual state. Does not replace curl for simple HTTP/API requests.'
---
# Web Browsing With Glimpse
`glimpse` runs headless Firefox via WebDriver. Use it for web search, reading rendered pages, running JavaScript, and screenshots. Prefer `curl` for simple APIs, static files, and direct downloads.
## Commands
| Command | Purpose |
| ------- | ------- |
| `reader <url>` | Extract page content as Markdown (Reader View → raw fallback) |
| `exec <url>` | Run JavaScript on a page, return the result |
| `screenshot <url>` | Save a PNG screenshot |
| `search <query>` | Search the web (Kagi) and return results |
| `serve` | Start a persistent browser for faster repeat commands |
## Persistent Server
For multi-command sessions, start a persistent browser server first. All subsequent commands auto-discover it via Unix socket — no extra flags needed.
```bash
# Start persistent server (keeps geckodriver + Firefox alive)
glimpse serve &
# All commands now reuse the running browser (~300ms vs ~2-3s each)
glimpse reader https://example.com
glimpse reader https://other.com
glimpse exec https://example.com --js='return document.title'
# Check server status
glimpse serve --status
# Stop when done
glimpse serve --stop
```
State (cookies, localStorage) persists between commands — this is intentional for sticky sessions. Without a running server, commands work normally with ad-hoc browser startup.
## Quick Reference
```bash
# Read a page (tries Reader View, falls back to raw Turndown)
glimpse reader https://example.com --timeout=15
# Read without Reader View (raw HTML → Markdown via Turndown)
glimpse reader https://example.com --no-reader --timeout=15
# Get structured JSON instead of Markdown (includes method: "reader"|"raw")
glimpse reader https://example.com --format=json
# Save extracted content to a file
glimpse reader https://example.com --output=page.md
# Run JavaScript and return a value
glimpse exec https://example.com --js='return document.title'
# Extract specific data with JavaScript
glimpse exec https://example.com --wait-until=complete --js='return {
title: document.title,
text: document.body.innerText.slice(0, 4000)
}'
# Wait for dynamic content before extracting
glimpse reader https://example.com \
--wait-js='return document.querySelector(".content")?.innerText?.length > 100' \
--timeout=30
# Capture a screenshot
glimpse screenshot https://example.com --output=page.png
# Search the web
glimpse search "query terms" --timeout=15
# Search and get JSON instead of Markdown
glimpse search "query terms" --format=json
```
## Common Options
| Option | Default | Purpose |
| ------ | ------- | ------- |
| `--timeout=<s>` | `10` | Max wait time in seconds; increase for slow/JS-heavy pages |
| `--wait-until=<state>` | `none` | Wait for `none`, `interactive`, or `complete` |
| `--wait-js=<code>` | — | Poll JS expression until truthy |
| `--js=<code>` | — | Run inline JS before command logic |
| `--script=<file>` | — | Run JS file before command logic |
| `--no-headless` | — | Show the browser window |
| `--format=<fmt>` | varies | Output format (reader: `markdown`/`html`/`text`/`json`; search: `markdown`/`json`) |
| `--output=<file>` | — | Write output to file (reader, screenshot) |
| `--no-reader` | — | Skip Reader View, use raw page extraction |
## Workflow
1. **Search first** when the user asks an open-ended question. Pick authoritative results to read.
2. **Read pages with `reader`** — it tries Firefox Reader View for clean article extraction, then falls back to converting the raw page HTML to Markdown via Turndown. Most pages work without extra options.
3. **Add `--wait-until=complete`** for JS-heavy pages, SPAs, or pages that load content dynamically.
4. **Use `exec`** when you need targeted data extraction via JavaScript rather than full page content.
5. **Use `screenshot`** when visual layout, charts, or rendering state matters.
6. **Increase timeouts** — start at `15`, go to `30` for slow sites. The default `10` is often too tight for real-world pages.
7. **Cite URLs** when summarizing web research. Distinguish search snippets from verified page content.
## Error Handling
| Error | Fix |
| ----- | --- |
| `TIMEOUT` | Increase `--timeout` (in seconds), add `--wait-until=complete`, or use `--wait-js` |
| `USAGE_ERROR` | Check arg order: `glimpse <command> <url>`, search is `glimpse search "query"` |
| Thin/empty content | Try `--wait-until=complete`, `--no-reader`, or targeted `exec` |
| Search auth errors | Kagi token is configured via `~/.config/glimpse/config.json` or `KAGI_TOKEN` env |

View File

@@ -0,0 +1,10 @@
---
name: scout
description: Fast codebase reconnaissance. Reads and searches files, then returns concise findings with paths.
approved_tools:
- read
- bash
---
You are a focused codebase scout running as a subagent with isolated context.
Find the information requested by the task and return a concise report. Prefer exact file paths, symbol names, and line numbers. Do not modify files.

View File

@@ -1,9 +1,8 @@
{
lib,
pkgs,
config,
namespace,
...
{ lib
, pkgs
, config
, namespace
, ...
}:
let
inherit (lib) mkIf;
@@ -18,6 +17,8 @@ let
# writing other fields (current model, etc.) without us clobbering them.
piPackages = [
"https://gitea.va.reichard.io/evan/pi-lsp.git@main"
"https://gitea.va.reichard.io/evan/pi-web.git@main"
"https://gitea.va.reichard.io/evan/pi-subagents.git@main"
"https://gitea.va.reichard.io/evan/pi-statusline.git@main"
];
@@ -35,15 +36,19 @@ let
];
piAuthJqRawfiles = lib.concatStringsSep " \\\n " (
map (
map
(
auth: ''--rawfile ${auth.jqVar} "${config.sops.secrets.${auth.secretName}.path}"''
) piAuthApiKeys
)
piAuthApiKeys
);
piAuthJqFilter = lib.concatStringsSep " | " (
map (
map
(
auth: ''.["${auth.provider}"] = { type: "api_key", key: ($'' + auth.jqVar + ''| rtrimstr("\n")) }''
) piAuthApiKeys
)
piAuthApiKeys
);
piAuthMergeScript = pkgs.writeShellScript "pi-auth-merge" ''
@@ -89,7 +94,6 @@ in
# Define Pi Configuration
home.file = {
".pi/agent/AGENTS.md" = {
source = ./config/AGENTS.md;
};
@@ -97,6 +101,10 @@ in
source = ./config/skills;
recursive = true;
};
".pi/agent/subagents" = {
source = ./config/subagents;
recursive = true;
};
".pi/agent/prompts" = {
source = ./config/prompts;
recursive = true;
@@ -116,11 +124,24 @@ in
};
}
// lib.listToAttrs (
map (auth: {
map
(auth: {
name = auth.secretName;
value.sopsFile = auth.sopsFile;
}) piAuthApiKeys
})
piAuthApiKeys
);
# Pi Web Config - Sops template so the kagi token (declared by the
# glimpse module, which pi enables above) can be embedded alongside
# the non-secret searxng base URL.
templates."pi-web.json" = {
path = "${config.home.homeDirectory}/.pi/pi-web/config.json";
content = builtins.toJSON {
provider = "searxng";
kagi.token = "${config.sops.placeholder.kagi_token}";
searxng.baseUrl = "https://search.va.reichard.io";
};
};
templates."pi-models.json" = {
path = "${config.home.homeDirectory}/.pi/agent/models.json";
content = builtins.toJSON {

View File

@@ -5,23 +5,23 @@
, ...
}:
let
cfg = config.${namespace}.services.swww;
cfg = config.${namespace}.services.awww;
in
{
options.${namespace}.services.swww = {
enable = lib.mkEnableOption "swww wallpaper service";
options.${namespace}.services.awww = {
enable = lib.mkEnableOption "awww wallpaper service";
};
config = lib.mkIf cfg.enable {
home.packages = with pkgs; [
swww
awww
];
systemd.user = {
services = {
swww-daemon = {
awww-daemon = {
Unit = {
Description = "SWWW Wallpaper Daemon";
Description = "AWWW Wallpaper Daemon";
After = [ "graphical-session.target" ];
};
@@ -31,7 +31,7 @@ in
Service = {
Type = "simple";
ExecStart = "${pkgs.swww}/bin/swww-daemon";
ExecStart = "${pkgs.awww}/bin/awww-daemon";
Restart = "on-failure";
RestartSec = 5;
};
@@ -39,28 +39,28 @@ in
change-wallpaper = {
Unit = {
Description = "SWWW Wallpaper Changer";
After = [ "swww-daemon.service" ];
Requires = [ "swww-daemon.service" ];
Description = "AWWW Wallpaper Changer";
After = [ "awww-daemon.service" ];
Requires = [ "awww-daemon.service" ];
};
Install = {
WantedBy = [ "swww-daemon.service" ];
WantedBy = [ "awww-daemon.service" ];
};
Service = {
Type = "oneshot";
ExecStart = "${pkgs.writeShellScript "change-wallpaper-script" ''
WALLPAPER=$(${pkgs.findutils}/bin/find $HOME/Wallpapers -type f | ${pkgs.coreutils}/bin/shuf -n 1)
${pkgs.swww}/bin/swww img "$WALLPAPER" --transition-type random
${pkgs.awww}/bin/awww img "$WALLPAPER" --transition-type random
''}";
};
};
};
timers.swww-schedule = {
timers.awww-schedule = {
Unit = {
Description = "SWWW Wallpaper Schedule";
Description = "AWWW Wallpaper Schedule";
};
Install = {

View File

@@ -0,0 +1,76 @@
{ config
, lib
, pkgs
, namespace
, ...
}:
let
inherit (lib) mkIf mkEnableOption;
cfg = config.${namespace}.services.open-proxy;
package = pkgs.reichard.open-proxy;
secretName = "open_proxy_token";
tokenPath = config.sops.secrets.${secretName}.path;
in
{
options.${namespace}.services.open-proxy = {
server.enable = mkEnableOption "open-proxy host server (opens forwarded URLs/files on this machine)";
client.enable = mkEnableOption "open-proxy client (shadows open/xdg-open to forward to the host)";
};
config = lib.mkMerge [
(mkIf (cfg.server.enable || cfg.client.enable) {
sops.secrets.${secretName} = {
sopsFile = lib.snowfall.fs.get-file "secrets/common/evanreichard.yaml";
};
})
(mkIf cfg.server.enable {
assertions = [
{
assertion = pkgs.stdenv.isDarwin;
message = "reichard.services.open-proxy.server is only supported on macOS (Darwin).";
}
];
launchd.agents.open-proxy = {
enable = true;
config = {
Label = "io.reichard.open-proxy";
ProgramArguments = [ "${package}/bin/open-proxy" "serve" ];
RunAtLoad = true;
KeepAlive = true;
EnvironmentVariables = {
OPEN_PROXY_TOKEN_FILE = tokenPath;
# open(1) lives in /usr/bin; launchd agents don't inherit a login PATH.
PATH = "/usr/bin:/bin:/usr/sbin:/sbin";
};
StandardOutPath = "${config.home.homeDirectory}/Library/Logs/open-proxy/open-proxy.out.log";
StandardErrorPath = "${config.home.homeDirectory}/Library/Logs/open-proxy/open-proxy.err.log";
};
};
})
(mkIf cfg.client.enable {
assertions = [
{
assertion = pkgs.stdenv.isLinux;
message = "reichard.services.open-proxy.client is only supported on Linux.";
}
];
# Shadow the openers via ~/.local/bin (prepended to PATH below). open-proxy
# keys off argv[0], so these symlinks run in client mode and fall back to
# any real opener further down PATH when the host is unreachable.
home.file = {
".local/bin/open".source = "${package}/bin/open-proxy";
".local/bin/xdg-open".source = "${package}/bin/open-proxy";
};
home.sessionPath = [ "$HOME/.local/bin" ];
home.sessionVariables = {
BROWSER = "open";
OPEN_PROXY_TOKEN_FILE = tokenPath;
};
})
];
}

View File

@@ -16,6 +16,12 @@ in
enable32Bit = mkBoolOpt false "enable 32-bit";
enableIntel = mkBoolOpt false "support for intel";
enableNvidia = mkBoolOpt false "support for nvidia";
nvidiaPackage = lib.mkOption {
type = lib.types.package;
default = config.boot.kernelPackages.nvidiaPackages.stable;
defaultText = "config.boot.kernelPackages.nvidiaPackages.stable";
description = "nvidia driver package; pin to legacy_580 for Pascal (GTX 10xx) and older";
};
};
config = mkIf cfg.enable {
@@ -32,7 +38,7 @@ in
# Enable Nvidia Hardware
hardware.nvidia = mkIf cfg.enableNvidia {
package = config.boot.kernelPackages.nvidiaPackages.stable;
package = cfg.nvidiaPackage;
modesetting.enable = true;
powerManagement.enable = true;
open = false;

View File

@@ -38,6 +38,7 @@ in
sshUser = "evanreichard";
protocol = "ssh";
sshKey = config.sops.secrets.builder_ssh_key.path;
publicHostKey = "c3NoLWVkMjU1MTkgQUFBQUMzTnphQzFsWkRJMU5URTVBQUFBSUdscEMwcm9yQVRLeks4bUxNS2dDWXFNNU4yTi9HZ1MydDRNMTNjd25BT1M=";
supportedFeatures = [
"benchmark"
"big-parallel"

View File

@@ -1,8 +1,12 @@
# llama-swap Module — Agent Guide
## Model ID Convention
Use `<family>-<size>[-backend/variant][-context][-vl]-<placement>`. Omit `thinking` from IDs, use `vl` for vision-language models, and keep placement as the final suffix (`cuda0`, `cuda1`, or `dual`). Keep quantization and richer behavior details in the display `name` unless they are needed to distinguish two active configs for the same family/placement.
## Syncing vLLM Configs from club-3090
The three vLLM model configs in `config.nix` (`vllm-qwen3.6-27b-long-text`, `vllm-qwen3.6-27b-long-vision`, `vllm-qwen3.6-27b-tools-text`) are derived from the club-3090 repo's Docker Compose files. Each config block has a `Synced from:` comment with the commit hash it was last aligned to.
The three vLLM model configs in `config.nix` (`qwen3.6-27b-vllm-180k-cuda0`, `qwen3.6-27b-vllm-145k-vl-cuda0`, `qwen3.6-27b-vllm-75k-cuda0`) are derived from the club-3090 repo's Docker Compose files. Each config block has a `Synced from:` comment with the commit hash it was last aligned to.
### Source Files
@@ -10,9 +14,9 @@ The upstream compose files live at https://github.com/noonghunna/club-3090 under
| config.nix model ID | Compose file |
|------------------------------------|-------------------------------------|
| `vllm-qwen3.6-27b-long-text` | `docker-compose.long-text.yml` |
| `vllm-qwen3.6-27b-long-vision` | `docker-compose.long-vision.yml` |
| `vllm-qwen3.6-27b-tools-text` | `docker-compose.tools-text.yml` |
| `qwen3.6-27b-vllm-180k-cuda0` | `docker-compose.long-text.yml` |
| `qwen3.6-27b-vllm-145k-vl-cuda0` | `docker-compose.long-vision.yml` |
| `qwen3.6-27b-vllm-75k-cuda0` | `docker-compose.tools-text.yml` |
### Sync Process
@@ -28,7 +32,7 @@ The upstream compose files live at https://github.com/noonghunna/club-3090 under
- Genesis env vars — the full set grows frequently; add new ones, remove deprecated ones
- Sidecar patches — old patches get absorbed into Genesis; drop them from entrypoint + volume mounts
- Docker image tag — update when the compose files move to a new nightly
4. **Keep `patch_timings_07351e088.py`** — this is our own patch, not from club-3090. Always retain it in the entrypoint and volume mounts.
4. **Keep `patch_timings_1acd67a.py`** — this is our own patch, not from club-3090. Always retain it in the entrypoint and volume mounts.
5. **Update the `Synced from:` comment** on each config block with the new commit hash and date.
6. **Update `setup-qwen36-vllm.sh`** if the upstream `patches/` directory changed (new patches added, old ones removed). The setup script downloads sidecar patches and creates cache directories.
7. **Verify syntax**: `nix-instantiate --parse config.nix`

View File

@@ -1,6 +1,7 @@
{ pkgs }:
let
llama-cpp = pkgs.reichard.llama-cpp;
ik-llama-cpp = pkgs.reichard.ik-llama-cpp;
stable-diffusion-cpp = pkgs.reichard.stable-diffusion-cpp.override {
cudaSupport = true;
};
@@ -13,8 +14,8 @@ in
# ---------------------------------------
# https://huggingface.co/mradermacher/gpt-oss-20b-heretic-v2-i1-GGUF/tree/main
"gpt-oss-20b-thinking" = {
name = "GPT OSS (20B) - Thinking";
"gpt-oss-20b-cuda0" = {
name = "GPT OSS 20B (CUDA0)";
macros.ctx = "131072";
cmd = ''
${llama-cpp}/bin/llama-server \
@@ -32,20 +33,24 @@ in
};
# https://huggingface.co/unsloth/Qwen3.6-35B-A3B-GGUF/tree/main
"qwen3.6-35b-thinking" = {
name = "Qwen3.6 (35B) - Thinking";
"qwen3.6-35b-cuda0" = {
name = "Qwen3.6 35B (CUDA0, UD-IQ4)";
macros.ctx = "262144";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3.6/Qwen3.6-35B-A3B-UD-IQ4_XS.gguf \
-m /mnt/ssd/Models/Qwen3.6/Qwen3.6-35B-A3B-UD-IQ4_NL.gguf \
-c ''${ctx} \
-np 2 -kvu \
--temp 0.6 \
--top-p 0.95 \
--top-k 20 \
--min-p 0.0 \
--presence-penalty 0.0 \
--repeat-penalty 1.0 \
-ctk q8_0 \
-ctv q8_0 \
--spec-type draft-mtp \
--spec-draft-n-max 3 \
-dev CUDA0 \
-fit off \
--chat-template-kwargs "{\"preserve_thinking\": true}"
@@ -58,37 +63,126 @@ in
};
};
# https://huggingface.co/unsloth/Qwen3.6-27B-GGUF/tree/main
"qwen3.6-27b-thinking" = {
name = "Qwen3.6 (27B) - Thinking";
macros.ctx = "196608";
# https://huggingface.co/ubergarm/Qwen3.6-27B-GGUF/tree/main
"qwen3.6-27b-ik-cuda0" = {
name = "Qwen3.6 (27B) (CUDA0, IQ4_KS)";
macros.ctx = "156000";
env = [ "CUDA_VISIBLE_DEVICES=0" ];
cmd = ''
${ik-llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3.6/Qwen3.6-27B-MTP-IQ4_KS.gguf \
-c ''${ctx} -ctk q8_0 -ctv q8_0 -ngl 99 \
-mtp --draft-max 4 --draft-p-min 0.75 \
-muge -mqkv -cram 32768 --ctx-checkpoints 32 \
--jinja --chat-template-kwargs '{"preserve_thinking":true}'
'';
metadata = {
type = [
"text-generation"
"coding"
];
};
};
# https://huggingface.co/unsloth/Qwen3.6-27B-GGUF-MTP/tree/main
"qwen3.6-27b-cuda0" = {
name = "Qwen3.6 27B (CUDA0, UD-Q4)";
macros.ctx = "110000";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3.6/Qwen3.6-27B-IQ4_XS.gguf \
-m /mnt/ssd/Models/Qwen3.6/Qwen3.6-27B-UD-Q4_K_XL.gguf \
-c ''${ctx} \
--parallel 2 \
-np 2 -kvu \
--temp 0.6 \
--top-p 0.95 \
--top-k 20 \
--min-p 0.00 \
--presence-penalty 1.5 \
--presence-penalty 0.0 \
-ctk q8_0 \
-ctv q8_0 \
--keep 3000 \
--batch-size 4096 \
--ubatch-size 1024 \
--spec-type ngram-mod \
--spec-ngram-mod-n-match 24 \
--spec-draft-n-min 16 \
--spec-draft-n-max 64 \
--spec-type draft-mtp \
--spec-draft-n-max 3 \
-dev CUDA0 \
-fit off \
--chat-template-kwargs "{\"preserve_thinking\": true}"
'';
# --chat-template-kwargs "{\"enable_thinking\": false}"
# --spec-draft-n-min 16 \
# --spec-draft-n-max 32 \
metadata = {
type = [
"text-generation"
"coding"
];
};
};
# https://huggingface.co/unsloth/gemma-4-26B-A4B-it-GGUF/tree/main
"gemma-4-26b-vl-cuda0" = {
name = "Gemma 4 26B (VL, CUDA0)";
macros.ctx = "196608";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Gemma/gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf \
--mmproj /mnt/ssd/Models/Gemma/mmproj-BF16_gemma-4-26B-A4B-it-UD-Q4_K_XL.gguf \
-c ''${ctx} \
--parallel 1 \
--spec-type ngram-mod \
--spec-ngram-mod-n-match 24 \
--spec-ngram-mod-n-min 48 \
--spec-ngram-mod-n-max 64 \
--temp 1.0 \
--top-k 64 \
--top-p 0.95 \
--no-warmup \
--jinja \
-fit off \
-dev CUDA0
'';
metadata = {
type = [
"text-generation"
"vision"
];
};
};
# https://huggingface.co/Lorbus/Qwen3.6-27B-int4-AutoRound
"qwen3.6-27b-vllm-50k-cuda0" = {
name = "Qwen3.6 27B (vLLM, 50K, CUDA0)";
checkEndpoint = "/v1/models";
macros.ctx = "50000";
proxy = "http://127.0.0.1:\${PORT}";
cmd = ''
${pkgs.docker}/bin/docker run --rm --device=nvidia.com/gpu=all \
--name ''${MODEL_ID} \
-e CUDA_DEVICE_ORDER=PCI_BUS_ID \
-e CUDA_VISIBLE_DEVICES=0 \
-e VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS=0 \
-v /mnt/ssd/vLLM/Models:/root/.cache/huggingface \
-p ''${PORT}:8000 \
vllm/vllm-openai:latest \
/root/.cache/huggingface/qwen3.6-27b-autoround-int4 \
--served-model-name ''${MODEL_ID} \
--quantization auto_round \
--dtype float16 \
--tensor-parallel-size 1 \
--gpu-memory-utilization 0.97 \
--max-model-len ''${ctx} \
--max-num-seqs 1 \
--max-num-batched-tokens 4128 \
--kv-cache-dtype fp8_e5m2 \
--enable-chunked-prefill \
--enable-prefix-caching \
--speculative-config '{"method":"mtp","num_speculative_tokens":3}' \
--enable-auto-tool-choice \
--tool-call-parser qwen3_coder \
--trust-remote-code \
--default-chat-template-kwargs '{"enable_thinking": false}' \
--host 0.0.0.0 \
--port 8000
'';
cmdStop = "${pkgs.docker}/bin/docker stop \${MODEL_ID}";
metadata = {
type = [
"text-generation"
@@ -98,11 +192,240 @@ in
};
# https://github.com/noonghunna/club-3090/tree/master/models/qwen3.6-27b/vllm
# Synced from: club-3090 f6613c8 (2026-05-02) — docker-compose.long-text.yml
# Long-text variant - 180K context, text-only (no vision)
# TurboQuant 3-bit KV + MTP n=3 + Genesis v7.69 + Cliff 2 closure recipe
"vllm-qwen3.6-27b-long-text" = {
name = "vLLM Qwen3.6 (27B) - Long Text";
"qwen3.6-27b-vllm-75k-cuda0" = {
name = "Qwen3.6 27B (vLLM, 75K, CUDA0)";
checkEndpoint = "/v1/models";
macros.ctx = "75000";
proxy = "http://127.0.0.1:\${PORT}";
cmd =
let
vllmCmd = ''
set -e; pip install xxhash pandas scipy -q;
python3 -m vllm._genesis.patches.apply_all;
python3 /patches/patch_timings_1acd67a.py;
exec vllm serve ''${VLLM_ENFORCE_EAGER:+--enforce-eager}
--served-model-name ''${MODEL_ID}
--model /root/.cache/huggingface/qwen3.6-27b-autoround-int4
--quantization auto_round
--dtype float16
--tensor-parallel-size 1
--max-model-len ''${ctx}
--gpu-memory-utilization 0.97
--max-num-seqs 1
--max-num-batched-tokens 2048
--kv-cache-dtype fp8_e5m2
--language-model-only
--trust-remote-code
--reasoning-parser qwen3
--enable-auto-tool-choice
--tool-call-parser qwen3_coder
--chat-template /templates/chat_template.jinja
--enable-prefix-caching
--enable-chunked-prefill
--speculative-config '{\"method\":\"mtp\",\"num_speculative_tokens\":3}'
--host 0.0.0.0
--port 8000
'';
vllmCmdFlat = builtins.replaceStrings [ "\n" ] [ " " ] vllmCmd;
in
''
${pkgs.docker}/bin/docker run --rm --device=nvidia.com/gpu=all \
--name ''${MODEL_ID} \
--ipc=host \
-e CUDA_DEVICE_MAX_CONNECTIONS=8 \
-e CUDA_DEVICE_ORDER=PCI_BUS_ID \
-e CUDA_VISIBLE_DEVICES=0 \
-e GENESIS_ENABLE_P58_ASYNC_PLACEHOLDER_FIX=1 \
-e GENESIS_ENABLE_P64_QWEN3CODER_MTP_STREAMING=1 \
-e GENESIS_ENABLE_P66_CUDAGRAPH_SIZE_FILTER=1 \
-e GENESIS_ENABLE_P68_AUTO_FORCE_TOOL=1 \
-e GENESIS_ENABLE_P69_LONG_CTX_TOOL_REMINDER=1 \
-e GENESIS_ENABLE_P72_PROFILE_RUN_CAP=1 \
-e GENESIS_ENABLE_P74_CHUNK_CLAMP=1 \
-e GENESIS_ENABLE_P94=1 \
-e GENESIS_ENABLE_PN13_CUDA_GRAPH_LAMBDA_ARITY=1 \
-e GENESIS_ENABLE_PN14_TQ_DECODE_OOB_CLAMP=1 \
-e GENESIS_ENABLE_PN17_FA2_LSE_CLAMP=1 \
-e GENESIS_ENABLE_PN19_SCOPED_MAX_SPLIT=1 \
-e GENESIS_ENABLE_PN59_STREAMING_GDN=1 \
-e GENESIS_ENABLE_PN8_MTP_DRAFT_ONLINE_QUANT=1 \
-e GENESIS_P68_P69_LONG_CTX_THRESHOLD_CHARS=50000 \
-e GENESIS_PROFILE_RUN_CAP_M=4128 \
-e NCCL_CUMEM_ENABLE=0 \
-e NCCL_P2P_DISABLE=1 \
-e OMP_NUM_THREADS=1 \
-e PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True,max_split_size_mb:512 \
-e VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
-e VLLM_FLOAT32_MATMUL_PRECISION=high \
-e VLLM_MARLIN_USE_ATOMIC_ADD=1 \
-e VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS=0 \
-e VLLM_NO_USAGE_STATS=1 \
-e VLLM_USE_FLASHINFER_SAMPLER=1 \
-e VLLM_WORKER_MULTIPROC_METHOD=spawn \
-e VLLM_ENFORCE_EAGER \
-v /mnt/ssd/vLLM/Models:/root/.cache/huggingface \
-v /mnt/ssd/vLLM/Patches/genesis/vllm/_genesis:/usr/local/lib/python3.12/dist-packages/vllm/_genesis:ro \
-v /mnt/ssd/vLLM/Patches/patch_timings_1acd67a.py:/patches/patch_timings_1acd67a.py:ro \
-v /mnt/ssd/vLLM/Templates/chat_template-v11.jinja:/templates/chat_template.jinja \
-p ''${PORT}:8000 \
--entrypoint /bin/bash \
vllm/vllm-openai:nightly-1acd67a795ebccdf9b9db7697ae9082058301657 \
-c "${vllmCmdFlat}"
'';
# Cache Bug - On resume from cache, VRAM usage is higher than just generating in real time.
# -e TRITON_CACHE_DIR=/root/.triton/cache \
# -v /mnt/ssd/vLLM/Cache/torch_compile:/root/.cache/vllm/torch_compile_cache \
# -v /mnt/ssd/vLLM/Cache/triton:/root/.triton/cache \
cmdStop = "${pkgs.docker}/bin/docker stop \${MODEL_ID}";
metadata = {
type = [
"text-generation"
"coding"
];
};
};
# https://github.com/noonghunna/club-3090/tree/master/models/qwen3.6-27b/vllm
"qwen3.6-27b-vllm-145k-vl-cuda0" = {
name = "Qwen3.6 27B (vLLM, 145K, VL, CUDA0)";
checkEndpoint = "/v1/models";
macros.ctx = "145000";
proxy = "http://127.0.0.1:\${PORT}";
cmd =
let
vllmCmd = ''
set -e; pip install xxhash pandas scipy -q;
python3 -m vllm._genesis.patches.apply_all;
python3 /patches/patch_timings_1acd67a.py;
exec vllm serve ''${VLLM_ENFORCE_EAGER:+--enforce-eager}
--served-model-name ''${MODEL_ID}
--model /root/.cache/huggingface/qwen3.6-27b-autoround-int4
--quantization auto_round
--dtype float16
--tensor-parallel-size 1
--max-model-len ''${ctx}
--gpu-memory-utilization 0.95
--max-num-seqs 1
--max-num-batched-tokens 4128
--kv-cache-dtype turboquant_3bit_nc
--trust-remote-code
--reasoning-parser qwen3
--enable-auto-tool-choice
--tool-call-parser qwen3_coder
--enable-prefix-caching
--enable-chunked-prefill
--no-scheduler-reserve-full-isl
--speculative-config '{\"method\":\"mtp\",\"num_speculative_tokens\":3}'
--host 0.0.0.0
--port 8000
'';
vllmCmdFlat = builtins.replaceStrings [ "\n" ] [ " " ] vllmCmd;
in
''
${pkgs.docker}/bin/docker run --rm --device=nvidia.com/gpu=all \
--name ''${MODEL_ID} \
--ipc=host \
-e CUDA_DEVICE_MAX_CONNECTIONS=8 \
-e CUDA_DEVICE_ORDER=PCI_BUS_ID \
-e CUDA_VISIBLE_DEVICES=0 \
-e GENESIS_BUFFER_MODE=shared \
-e GENESIS_ENABLE_P100=1 \
-e GENESIS_ENABLE_P101=1 \
-e GENESIS_ENABLE_P103=1 \
-e GENESIS_ENABLE_P15B_FA_VARLEN_CLAMP=1 \
-e GENESIS_ENABLE_P38B_COMPILE_SAFE=1 \
-e GENESIS_ENABLE_P4=1 \
-e GENESIS_ENABLE_P58_ASYNC_PLACEHOLDER_FIX=1 \
-e GENESIS_ENABLE_P60B_TRITON_KERNEL=1 \
-e GENESIS_ENABLE_P60_GDN_NGRAM_FIX=1 \
-e GENESIS_ENABLE_P61B_STREAMING_OVERLAP=1 \
-e GENESIS_ENABLE_P61_QWEN3_MULTI_TOOL=1 \
-e GENESIS_ENABLE_P62_STRUCT_OUT_SPEC_TIMING=1 \
-e GENESIS_ENABLE_P64_QWEN3CODER_MTP_STREAMING=1 \
-e GENESIS_ENABLE_P66_CUDAGRAPH_SIZE_FILTER=1 \
-e GENESIS_ENABLE_P67_TQ_MULTI_QUERY_KERNEL=1 \
-e GENESIS_ENABLE_P68_AUTO_FORCE_TOOL=1 \
-e GENESIS_ENABLE_P69_LONG_CTX_TOOL_REMINDER=1 \
-e GENESIS_ENABLE_P72_PROFILE_RUN_CAP=1 \
-e GENESIS_ENABLE_P74_CHUNK_CLAMP=1 \
-e GENESIS_ENABLE_P78_TOLIST_CAPTURE_GUARD=0 \
-e GENESIS_ENABLE_P81_FP8_BLOCK_SCALED_M_LE_8=0 \
-e GENESIS_ENABLE_P82=0 \
-e GENESIS_ENABLE_P83=1 \
-e GENESIS_ENABLE_P87=1 \
-e GENESIS_ENABLE_P91=1 \
-e GENESIS_ENABLE_P94=1 \
-e GENESIS_ENABLE_P98=1 \
-e GENESIS_ENABLE_P99=1 \
-e GENESIS_ENABLE_PN11_GDN_AB_CONTIGUOUS=1 \
-e GENESIS_ENABLE_PN12_FFN_INTERMEDIATE_POOL=1 \
-e GENESIS_ENABLE_PN13_CUDA_GRAPH_LAMBDA_ARITY=1 \
-e GENESIS_ENABLE_PN14_TQ_DECODE_OOB_CLAMP=1 \
-e GENESIS_ENABLE_PN17_FA2_LSE_CLAMP=1 \
-e GENESIS_ENABLE_PN19_SCOPED_MAX_SPLIT=1 \
-e GENESIS_ENABLE_PN22_LOCAL_ARGMAX_TP=1 \
-e GENESIS_ENABLE_PN25_SILU_INDUCTOR_SAFE=1 \
-e GENESIS_ENABLE_PN26_SPARSE_V=1 \
-e GENESIS_ENABLE_PN30_DS_LAYOUT_SPEC_DECODE=1 \
-e GENESIS_ENABLE_PN34_WORKSPACE_LOCK_RELAX=1 \
-e GENESIS_ENABLE_PN59_STREAMING_GDN=1 \
-e GENESIS_ENABLE_PN8_MTP_DRAFT_ONLINE_QUANT=1 \
-e GENESIS_ENABLE_PN9_INDEPENDENT_DRAFTER_ATTN=1 \
-e GENESIS_P68_P69_LONG_CTX_THRESHOLD_CHARS=50000 \
-e GENESIS_P82_THRESHOLD_SINGLE=0.3 \
-e GENESIS_PN26_SPARSE_V_BLOCK_KV=8 \
-e GENESIS_PN26_SPARSE_V_NUM_WARPS=4 \
-e GENESIS_PN26_SPARSE_V_THRESHOLD=0.01 \
-e GENESIS_PREALLOC_TOKEN_BUDGET=4128 \
-e GENESIS_PROFILE_RUN_CAP_M=4128 \
-e NCCL_CUMEM_ENABLE=0 \
-e NCCL_P2P_DISABLE=1 \
-e OMP_NUM_THREADS=1 \
-e PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True,max_split_size_mb:512 \
-e VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
-e VLLM_FLOAT32_MATMUL_PRECISION=high \
-e VLLM_MARLIN_USE_ATOMIC_ADD=1 \
-e VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS=0 \
-e VLLM_NO_USAGE_STATS=1 \
-e VLLM_SSM_CONV_STATE_LAYOUT=DS \
-e VLLM_USE_FLASHINFER_SAMPLER=1 \
-e VLLM_USE_FUSED_MOE_GROUPED_TOPK=1 \
-e VLLM_WORKER_MULTIPROC_METHOD=spawn \
-e VLLM_ENFORCE_EAGER \
-v /mnt/ssd/vLLM/Models:/root/.cache/huggingface \
-v /mnt/ssd/vLLM/Patches/genesis/vllm/_genesis:/usr/local/lib/python3.12/dist-packages/vllm/_genesis:ro \
-v /mnt/ssd/vLLM/Patches/patch_timings_1acd67a.py:/patches/patch_timings_1acd67a.py:ro \
-p ''${PORT}:8000 \
--entrypoint /bin/bash \
vllm/vllm-openai:nightly-1acd67a795ebccdf9b9db7697ae9082058301657 \
-c "${vllmCmdFlat}"
'';
# Cache Bug - On resume from cache, VRAM usage is higher than just generating in real time.
# -e TRITON_CACHE_DIR=/root/.triton/cache \
# -v /mnt/ssd/vLLM/Cache/torch_compile:/root/.cache/vllm/torch_compile_cache \
# -v /mnt/ssd/vLLM/Cache/triton:/root/.triton/cache \
cmdStop = "${pkgs.docker}/bin/docker stop \${MODEL_ID}";
metadata = {
type = [
"text-generation"
"coding"
"vision"
];
};
};
# https://github.com/noonghunna/club-3090/tree/master/models/qwen3.6-27b/vllm
"qwen3.6-27b-vllm-180k-cuda0" = {
name = "Qwen3.6 27B (vLLM, 180K, CUDA0)";
checkEndpoint = "/v1/models";
macros.ctx = "180000";
proxy = "http://127.0.0.1:\${PORT}";
cmd =
@@ -110,8 +433,8 @@ in
vllmCmd = ''
set -e; pip install xxhash pandas scipy -q;
python3 -m vllm._genesis.patches.apply_all;
python3 /patches/patch_timings_07351e088.py;
exec vllm serve
python3 /patches/patch_timings_1acd67a.py;
exec vllm serve ''${VLLM_ENFORCE_EAGER:+--enforce-eager}
--served-model-name ''${MODEL_ID}
--model /root/.cache/huggingface/qwen3.6-27b-autoround-int4
--quantization auto_round
@@ -157,7 +480,6 @@ in
-e GENESIS_ENABLE_P61_QWEN3_MULTI_TOOL=1 \
-e GENESIS_ENABLE_P62_STRUCT_OUT_SPEC_TIMING=1 \
-e GENESIS_ENABLE_P64_QWEN3CODER_MTP_STREAMING=1 \
-e GENESIS_ENABLE_P65_TURBOQUANT_SPEC_CG_DOWNGRADE=1 \
-e GENESIS_ENABLE_P66_CUDAGRAPH_SIZE_FILTER=1 \
-e GENESIS_ENABLE_P67_TQ_MULTI_QUERY_KERNEL=1 \
-e GENESIS_ENABLE_P68_AUTO_FORCE_TOOL=1 \
@@ -183,6 +505,7 @@ in
-e GENESIS_ENABLE_PN25_SILU_INDUCTOR_SAFE=1 \
-e GENESIS_ENABLE_PN26_SPARSE_V=1 \
-e GENESIS_ENABLE_PN30_DS_LAYOUT_SPEC_DECODE=1 \
-e GENESIS_ENABLE_PN31_FA_VARLEN_PERSISTENT_OUT=1 \
-e GENESIS_ENABLE_PN32_GDN_CHUNKED_PREFILL=1 \
-e GENESIS_ENABLE_PN34_WORKSPACE_LOCK_RELAX=1 \
-e GENESIS_ENABLE_PN59_STREAMING_GDN=1 \
@@ -203,7 +526,6 @@ in
-e NCCL_P2P_DISABLE=1 \
-e OMP_NUM_THREADS=1 \
-e PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True,max_split_size_mb:512 \
-e TRITON_CACHE_DIR=/root/.triton/cache \
-e VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
-e VLLM_FLOAT32_MATMUL_PRECISION=high \
-e VLLM_MARLIN_USE_ATOMIC_ADD=1 \
@@ -213,244 +535,22 @@ in
-e VLLM_USE_FLASHINFER_SAMPLER=1 \
-e VLLM_USE_FUSED_MOE_GROUPED_TOPK=1 \
-e VLLM_WORKER_MULTIPROC_METHOD=spawn \
-e VLLM_ENFORCE_EAGER \
-v /mnt/ssd/vLLM/Models:/root/.cache/huggingface \
-v /mnt/ssd/vLLM/Cache/torch_compile:/root/.cache/vllm/torch_compile_cache \
-v /mnt/ssd/vLLM/Cache/triton:/root/.triton/cache \
-v /mnt/ssd/vLLM/Patches/genesis/vllm/_genesis:/usr/local/lib/python3.12/dist-packages/vllm/_genesis:ro \
-v /mnt/ssd/vLLM/Patches/patch_timings_07351e088.py:/patches/patch_timings_07351e088.py:ro \
-v /mnt/ssd/vLLM/Patches/patch_timings_1acd67a.py:/patches/patch_timings_1acd67a.py:ro \
-p ''${PORT}:8000 \
--entrypoint /bin/bash \
vllm/vllm-openai:nightly-01d4d1ad375dc5854779c593eee093bcebb0cada \
vllm/vllm-openai:nightly-1acd67a795ebccdf9b9db7697ae9082058301657 \
-c "${vllmCmdFlat}"
'';
cmdStop = "${pkgs.docker}/bin/docker stop \${MODEL_ID}";
metadata = {
type = [
"text-generation"
"coding"
];
};
};
# Cache Bug - On resume from cache, VRAM usage is higher than just generating in real time.
# https://github.com/noonghunna/club-3090/tree/master/models/qwen3.6-27b/vllm
# Synced from: club-3090 f6613c8 (2026-05-02) — docker-compose.long-vision.yml
# Long-vision variant - 145K context with vision tower active
# TurboQuant 3-bit KV + MTP n=3 + Genesis v7.69 + Cliff 2 env vars (mem-util kept at 0.95)
"vllm-qwen3.6-27b-long-vision" = {
name = "vLLM Qwen3.6 (27B) - Long Vision";
macros.ctx = "145000";
proxy = "http://127.0.0.1:\${PORT}";
cmd =
let
vllmCmd = ''
set -e; pip install xxhash pandas scipy -q;
python3 -m vllm._genesis.patches.apply_all;
python3 /patches/patch_timings_07351e088.py;
exec vllm serve
--served-model-name ''${MODEL_ID}
--model /root/.cache/huggingface/qwen3.6-27b-autoround-int4
--quantization auto_round
--dtype float16
--tensor-parallel-size 1
--max-model-len ''${ctx}
--gpu-memory-utilization 0.95
--max-num-seqs 1
--max-num-batched-tokens 4128
--kv-cache-dtype turboquant_3bit_nc
--trust-remote-code
--reasoning-parser qwen3
--enable-auto-tool-choice
--tool-call-parser qwen3_coder
--enable-prefix-caching
--enable-chunked-prefill
--no-scheduler-reserve-full-isl
--speculative-config '{\"method\":\"mtp\",\"num_speculative_tokens\":3}'
--host 0.0.0.0
--port 8000
'';
vllmCmdFlat = builtins.replaceStrings [ "\n" ] [ " " ] vllmCmd;
in
''
${pkgs.docker}/bin/docker run --rm --device=nvidia.com/gpu=all \
--name ''${MODEL_ID} \
--ipc=host \
-e CUDA_DEVICE_MAX_CONNECTIONS=8 \
-e CUDA_DEVICE_ORDER=PCI_BUS_ID \
-e CUDA_VISIBLE_DEVICES=0 \
-e GENESIS_BUFFER_MODE=shared \
-e GENESIS_ENABLE_P100=1 \
-e GENESIS_ENABLE_P101=1 \
-e GENESIS_ENABLE_P103=1 \
-e GENESIS_ENABLE_P15B_FA_VARLEN_CLAMP=1 \
-e GENESIS_ENABLE_P38B_COMPILE_SAFE=1 \
-e GENESIS_ENABLE_P4=1 \
-e GENESIS_ENABLE_P58_ASYNC_PLACEHOLDER_FIX=1 \
-e GENESIS_ENABLE_P60B_TRITON_KERNEL=1 \
-e GENESIS_ENABLE_P60_GDN_NGRAM_FIX=1 \
-e GENESIS_ENABLE_P61B_STREAMING_OVERLAP=1 \
-e GENESIS_ENABLE_P61_QWEN3_MULTI_TOOL=1 \
-e GENESIS_ENABLE_P62_STRUCT_OUT_SPEC_TIMING=1 \
-e GENESIS_ENABLE_P64_QWEN3CODER_MTP_STREAMING=1 \
-e GENESIS_ENABLE_P65_TURBOQUANT_SPEC_CG_DOWNGRADE=1 \
-e GENESIS_ENABLE_P66_CUDAGRAPH_SIZE_FILTER=1 \
-e GENESIS_ENABLE_P67_TQ_MULTI_QUERY_KERNEL=1 \
-e GENESIS_ENABLE_P68_AUTO_FORCE_TOOL=1 \
-e GENESIS_ENABLE_P69_LONG_CTX_TOOL_REMINDER=1 \
-e GENESIS_ENABLE_P72_PROFILE_RUN_CAP=1 \
-e GENESIS_ENABLE_P74_CHUNK_CLAMP=1 \
-e GENESIS_ENABLE_P78_TOLIST_CAPTURE_GUARD=0 \
-e GENESIS_ENABLE_P81_FP8_BLOCK_SCALED_M_LE_8=0 \
-e GENESIS_ENABLE_P82=0 \
-e GENESIS_ENABLE_P83=1 \
-e GENESIS_ENABLE_P87=1 \
-e GENESIS_ENABLE_P91=1 \
-e GENESIS_ENABLE_P94=1 \
-e GENESIS_ENABLE_P98=1 \
-e GENESIS_ENABLE_P99=1 \
-e GENESIS_ENABLE_PN11_GDN_AB_CONTIGUOUS=1 \
-e GENESIS_ENABLE_PN12_FFN_INTERMEDIATE_POOL=1 \
-e GENESIS_ENABLE_PN13_CUDA_GRAPH_LAMBDA_ARITY=1 \
-e GENESIS_ENABLE_PN14_TQ_DECODE_OOB_CLAMP=1 \
-e GENESIS_ENABLE_PN17_FA2_LSE_CLAMP=1 \
-e GENESIS_ENABLE_PN19_SCOPED_MAX_SPLIT=1 \
-e GENESIS_ENABLE_PN22_LOCAL_ARGMAX_TP=1 \
-e GENESIS_ENABLE_PN25_SILU_INDUCTOR_SAFE=1 \
-e GENESIS_ENABLE_PN26_SPARSE_V=1 \
-e GENESIS_ENABLE_PN30_DS_LAYOUT_SPEC_DECODE=1 \
-e GENESIS_ENABLE_PN32_GDN_CHUNKED_PREFILL=1 \
-e GENESIS_ENABLE_PN34_WORKSPACE_LOCK_RELAX=1 \
-e GENESIS_ENABLE_PN59_STREAMING_GDN=1 \
-e GENESIS_ENABLE_PN8_MTP_DRAFT_ONLINE_QUANT=1 \
-e GENESIS_ENABLE_PN9_INDEPENDENT_DRAFTER_ATTN=1 \
-e GENESIS_FLA_FWD_H_MAX_T=16384 \
-e GENESIS_P68_P69_LONG_CTX_THRESHOLD_CHARS=50000 \
-e GENESIS_P82_THRESHOLD_SINGLE=0.3 \
-e GENESIS_PN26_SPARSE_V_BLOCK_KV=8 \
-e GENESIS_PN26_SPARSE_V_NUM_WARPS=4 \
-e GENESIS_PN26_SPARSE_V_THRESHOLD=0.01 \
-e GENESIS_PN32_GDN_CHUNK_SIZE=8192 \
-e GENESIS_PN32_GDN_CHUNK_THRESHOLD=16384 \
-e GENESIS_PREALLOC_TOKEN_BUDGET=4128 \
-e GENESIS_PROFILE_RUN_CAP_M=4128 \
-e NCCL_CUMEM_ENABLE=0 \
-e NCCL_P2P_DISABLE=1 \
-e OMP_NUM_THREADS=1 \
-e PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True,max_split_size_mb:512 \
-e TRITON_CACHE_DIR=/root/.triton/cache \
-e VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
-e VLLM_FLOAT32_MATMUL_PRECISION=high \
-e VLLM_MARLIN_USE_ATOMIC_ADD=1 \
-e VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS=0 \
-e VLLM_NO_USAGE_STATS=1 \
-e VLLM_SSM_CONV_STATE_LAYOUT=DS \
-e VLLM_USE_FLASHINFER_SAMPLER=1 \
-e VLLM_USE_FUSED_MOE_GROUPED_TOPK=1 \
-e VLLM_WORKER_MULTIPROC_METHOD=spawn \
-v /mnt/ssd/vLLM/Models:/root/.cache/huggingface \
-v /mnt/ssd/vLLM/Cache/torch_compile:/root/.cache/vllm/torch_compile_cache \
-v /mnt/ssd/vLLM/Cache/triton:/root/.triton/cache \
-v /mnt/ssd/vLLM/Patches/genesis/vllm/_genesis:/usr/local/lib/python3.12/dist-packages/vllm/_genesis:ro \
-v /mnt/ssd/vLLM/Patches/patch_timings_07351e088.py:/patches/patch_timings_07351e088.py:ro \
-p ''${PORT}:8000 \
--entrypoint /bin/bash \
vllm/vllm-openai:nightly-01d4d1ad375dc5854779c593eee093bcebb0cada \
-c "${vllmCmdFlat}"
'';
cmdStop = "${pkgs.docker}/bin/docker stop \${MODEL_ID}";
# -e TRITON_CACHE_DIR=/root/.triton/cache \
# -v /mnt/ssd/vLLM/Cache/torch_compile:/root/.cache/vllm/torch_compile_cache \
# -v /mnt/ssd/vLLM/Cache/triton:/root/.triton/cache \
metadata = {
type = [
"text-generation"
"coding"
"vision"
];
};
};
# https://github.com/noonghunna/club-3090/tree/master/models/qwen3.6-27b/vllm
# Synced from: club-3090 ae4846f (2026-05-02) — docker-compose.tools-text.yml
# Tools-text variant - 75K context, text-only (no vision)
# fp8_e5m2 KV + MTP n=3. IDE agents (Cline, Cursor, OpenCode, etc.)
"vllm-qwen3.6-27b-tools-text" = {
name = "vLLM Qwen3.6 (27B) - Tools Text";
macros.ctx = "75000";
proxy = "http://127.0.0.1:\${PORT}";
cmd =
let
vllmCmd = ''
set -e; pip install xxhash pandas scipy -q;
python3 -m vllm._genesis.patches.apply_all;
python3 /patches/patch_timings_07351e088.py;
exec vllm serve
--served-model-name ''${MODEL_ID}
--model /root/.cache/huggingface/qwen3.6-27b-autoround-int4
--quantization auto_round
--dtype float16
--tensor-parallel-size 1
--max-model-len ''${ctx}
--gpu-memory-utilization 0.97
--max-num-seqs 1
--max-num-batched-tokens 2048
--kv-cache-dtype fp8_e5m2
--language-model-only
--trust-remote-code
--reasoning-parser qwen3
--enable-auto-tool-choice
--tool-call-parser qwen3_coder
--enable-prefix-caching
--enable-chunked-prefill
--speculative-config '{\"method\":\"mtp\",\"num_speculative_tokens\":3}'
--host 0.0.0.0
--port 8000
'';
vllmCmdFlat = builtins.replaceStrings [ "\n" ] [ " " ] vllmCmd;
in
''
${pkgs.docker}/bin/docker run --rm --device=nvidia.com/gpu=all \
--name ''${MODEL_ID} \
--ipc=host \
-e CUDA_DEVICE_MAX_CONNECTIONS=8 \
-e CUDA_DEVICE_ORDER=PCI_BUS_ID \
-e CUDA_VISIBLE_DEVICES=0 \
-e GENESIS_ENABLE_P58_ASYNC_PLACEHOLDER_FIX=1 \
-e GENESIS_ENABLE_P64_QWEN3CODER_MTP_STREAMING=1 \
-e GENESIS_ENABLE_P66_CUDAGRAPH_SIZE_FILTER=1 \
-e GENESIS_ENABLE_P68_AUTO_FORCE_TOOL=1 \
-e GENESIS_ENABLE_P69_LONG_CTX_TOOL_REMINDER=1 \
-e GENESIS_ENABLE_P72_PROFILE_RUN_CAP=1 \
-e GENESIS_ENABLE_P74_CHUNK_CLAMP=1 \
-e GENESIS_ENABLE_P94=1 \
-e GENESIS_ENABLE_PN13_CUDA_GRAPH_LAMBDA_ARITY=1 \
-e GENESIS_ENABLE_PN14_TQ_DECODE_OOB_CLAMP=1 \
-e GENESIS_ENABLE_PN17_FA2_LSE_CLAMP=1 \
-e GENESIS_ENABLE_PN19_SCOPED_MAX_SPLIT=1 \
-e GENESIS_ENABLE_PN59_STREAMING_GDN=1 \
-e GENESIS_ENABLE_PN8_MTP_DRAFT_ONLINE_QUANT=1 \
-e GENESIS_P68_P69_LONG_CTX_THRESHOLD_CHARS=50000 \
-e GENESIS_PROFILE_RUN_CAP_M=4128 \
-e NCCL_CUMEM_ENABLE=0 \
-e NCCL_P2P_DISABLE=1 \
-e OMP_NUM_THREADS=1 \
-e PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True,max_split_size_mb:512 \
-e TRITON_CACHE_DIR=/root/.triton/cache \
-e VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
-e VLLM_FLOAT32_MATMUL_PRECISION=high \
-e VLLM_MARLIN_USE_ATOMIC_ADD=1 \
-e VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS=0 \
-e VLLM_NO_USAGE_STATS=1 \
-e VLLM_USE_FLASHINFER_SAMPLER=1 \
-e VLLM_WORKER_MULTIPROC_METHOD=spawn \
-v /mnt/ssd/vLLM/Models:/root/.cache/huggingface \
-v /mnt/ssd/vLLM/Cache/torch_compile:/root/.cache/vllm/torch_compile_cache \
-v /mnt/ssd/vLLM/Cache/triton:/root/.triton/cache \
-v /mnt/ssd/vLLM/Patches/genesis/vllm/_genesis:/usr/local/lib/python3.12/dist-packages/vllm/_genesis:ro \
-v /mnt/ssd/vLLM/Patches/patch_timings_07351e088.py:/patches/patch_timings_07351e088.py:ro \
-p ''${PORT}:8000 \
--entrypoint /bin/bash \
vllm/vllm-openai:nightly-01d4d1ad375dc5854779c593eee093bcebb0cada \
-c "${vllmCmdFlat}"
'';
cmdStop = "${pkgs.docker}/bin/docker stop \${MODEL_ID}";
metadata = {
@@ -465,37 +565,9 @@ in
# ------------- GTX 1080 Ti -------------
# ---------------------------------------
# https://huggingface.co/unsloth/Qwen3-VL-8B-Instruct-GGUF/tree/main
"qwen3-8b-vision" = {
name = "Qwen3 Vision (8B) - Thinking";
macros.ctx = "60000";
env = [ "CUDA_VISIBLE_DEVICES=1" ];
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3/Qwen3-VL-8B-Instruct-UD-Q4_K_XL.gguf \
--mmproj /mnt/ssd/Models/Qwen3/Qwen3-VL-8B-Instruct-UD-Q4_K_XL_mmproj-F16.gguf \
-c ''${ctx} \
--temp 0.7 \
--min-p 0.0 \
--top-p 0.8 \
--top-k 20 \
-ctk q8_0 \
-ctv q8_0 \
-fit off \
-dev CUDA0
'';
metadata = {
type = [
"text-generation"
"vision"
];
};
};
# https://huggingface.co/unsloth/Qwen3.5-9B-GGUF/tree/main
"qwen3.5-9b-thinking" = {
name = "Qwen3.5 (9B) - Thinking";
"qwen3.5-9b-vl-cuda1" = {
name = "Qwen3.5 9B (VL, CUDA1)";
macros.ctx = "131072";
env = [ "CUDA_VISIBLE_DEVICES=1" ];
cmd = ''
@@ -521,8 +593,8 @@ in
};
# https://huggingface.co/unsloth/Qwen3.5-4B-GGUF/tree/main
"qwen3.5-4b-thinking" = {
name = "Qwen3.5 (4B) - Thinking";
"qwen3.5-4b-cuda1" = {
name = "Qwen3.5 4B (CUDA1)";
macros.ctx = "131072";
env = [ "CUDA_VISIBLE_DEVICES=1" ];
cmd = ''
@@ -540,6 +612,7 @@ in
metadata = {
type = [
"text-generation"
"coding"
];
};
};
@@ -549,8 +622,8 @@ in
# ---------------------------------------
# https://huggingface.co/unsloth/Qwen3-Coder-Next-GGUF/tree/main
"qwen3-coder-next-80b-instruct" = {
name = "Qwen3 Coder Next (80B) - Instruct";
"qwen3-coder-next-80b-dual" = {
name = "Qwen3 Coder Next 80B (Dual GPU)";
macros.ctx = "131072";
cmd = ''
${llama-cpp}/bin/llama-server \
@@ -574,13 +647,79 @@ in
};
};
# https://huggingface.co/unsloth/Qwen3.6-27B-GGUF-MTP/tree/main
"qwen3.6-27b-dual" = {
name = "Qwen3.6 27B (Dual GPU, UD-Q6)";
macros.ctx = "120000";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3.6/Qwen3.6-27B-UD-Q6_K_XL.gguf \
-c ''${ctx} \
-np 4 -kvu \
--temp 0.6 \
--top-p 0.95 \
--top-k 20 \
--min-p 0.00 \
--presence-penalty 0.0 \
-ctk q8_0 \
-ctv q8_0 \
--spec-type draft-mtp \
--spec-draft-n-max 3 \
-dev CUDA0,CUDA1 \
-ts 73,27 \
-fit off \
--chat-template-kwargs "{\"preserve_thinking\": true}"
'';
metadata = {
type = [
"text-generation"
"coding"
];
};
};
# https://huggingface.co/unsloth/Qwen3.6-35B-A3B-MTP-GGUF/tree/main
"qwen3.6-35b-dual" = {
name = "Qwen3.6 35B (Dual GPU, UD-Q6)";
# macros.ctx = "215000";
# -ctk q8_0 \
# -ctv q8_0 \
macros.ctx = "131072";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3.6/Qwen3.6-35B-A3B-UD-Q6_K.gguf \
-c ''${ctx} \
-np 4 -kvu \
--temp 0.6 \
--top-p 0.95 \
--top-k 20 \
--min-p 0.00 \
--presence-penalty 0.0 \
--spec-type draft-mtp \
--spec-draft-n-max 3 \
-dev CUDA0,CUDA1 \
-fit off \
-ts 72,28 \
--chat-template-kwargs "{\"preserve_thinking\": true}"
'';
metadata = {
type = [
"text-generation"
"coding"
];
};
};
# ---------------------------------------
# ---------- Stable Diffussion ----------
# ---------- Stable Diffusion ----------
# ---------------------------------------
"z-image-turbo" = {
"z-image-turbo-cuda0" = {
name = "Z-Image-Turbo";
checkEndpoint = "/";
env = [ "CUDA_VISIBLE_DEVICES=0" ];
cmd = ''
${stable-diffusion-cpp}/bin/sd-server \
--listen-port ''${PORT} \
@@ -597,9 +736,10 @@ in
};
};
"qwen-image-edit-2511" = {
"qwen-image-edit-2511-cuda0" = {
name = "Qwen Image Edit 2511";
checkEndpoint = "/";
env = [ "CUDA_VISIBLE_DEVICES=0" ];
cmd = ''
${stable-diffusion-cpp}/bin/sd-server \
--listen-port ''${PORT} \
@@ -623,9 +763,10 @@ in
};
};
"qwen-image-2512" = {
"qwen-image-2512-cuda0" = {
name = "Qwen Image 2512";
checkEndpoint = "/";
env = [ "CUDA_VISIBLE_DEVICES=0" ];
cmd = ''
${stable-diffusion-cpp}/bin/sd-server \
--listen-port ''${PORT} \
@@ -645,9 +786,10 @@ in
};
};
"chroma-radiance" = {
"chroma-radiance-cuda0" = {
name = "Chroma Radiance";
checkEndpoint = "/";
env = [ "CUDA_VISIBLE_DEVICES=0" ];
cmd = ''
${stable-diffusion-cpp}/bin/sd-server \
--listen-port ''${PORT} \
@@ -667,36 +809,31 @@ in
# Concurrent Model Matrix
#
# CUDA0 models can run alongside CUDA1 models (one each). Models not
# listed in any set (dual-GPU models using -ts) run alone and evict
# everything.
# listed in any set (dual-GPU models) run alone and evict everything.
matrix = {
vars = {
# --- RTX 3090 Models ---
vlt = "vllm-qwen3.6-27b-long-text";
vtt = "vllm-qwen3.6-27b-tools-text";
vlv = "vllm-qwen3.6-27b-long-vision";
go = "gpt-oss-20b-thinking";
q36a = "qwen3.6-35b-thinking";
q36b = "qwen3.6-27b-thinking";
zi = "z-image-turbo";
qie = "qwen-image-edit-2511";
qi = "qwen-image-2512";
cr = "chroma-radiance";
v180 = "qwen3.6-27b-vllm-180k-cuda0";
v145 = "qwen3.6-27b-vllm-145k-vl-cuda0";
v75 = "qwen3.6-27b-vllm-75k-cuda0";
v50 = "qwen3.6-27b-vllm-50k-cuda0";
go = "gpt-oss-20b-cuda0";
g4 = "gemma-4-26b-vl-cuda0";
q36a = "qwen3.6-35b-cuda0";
q36b = "qwen3.6-27b-cuda0";
q36ik = "qwen3.6-27b-ik-cuda0";
zi = "z-image-turbo-cuda0";
qie = "qwen-image-edit-2511-cuda0";
qi = "qwen-image-2512-cuda0";
cr = "chroma-radiance-cuda0";
# --- GTX 1080 Ti Models ---
qv = "qwen3-8b-vision";
q4 = "qwen3.5-4b-thinking";
q9 = "qwen3.5-9b-thinking";
};
evict_costs = {
vlt = 50;
vtt = 50;
vlv = 50;
q4 = "qwen3.5-4b-cuda1";
q9 = "qwen3.5-9b-vl-cuda1";
};
sets = {
concurrent = "(go | q36a | q36b | vlt | vtt | vlv | zi | qie | qi | cr) & (qv | q4 | q9)";
concurrent = "(go | g4 | q36a | q36b | q36ik | v180 | v145 | v75 | v50 | zi | qie | qi | cr) & (q4 | q9)";
};
};
}

View File

@@ -11,6 +11,31 @@ let
cfg = config.${namespace}.services.llama-swap;
llama-swap = pkgs.reichard.llama-swap;
llamaCppPresets =
let
models = (import ./config.nix { inherit pkgs; }).models;
llamaCppModels = lib.filterAttrs (_: model: lib.hasInfix "/bin/llama-server" (model.cmd or "")) models;
in
builtins.mapAttrs (_: model: {
inherit (model) cmd;
name = model.name or "";
env = model.env or [ ];
}) llamaCppModels;
llamaCppPresetFile = pkgs.writeText "llama-cpp-presets.json" (builtins.toJSON llamaCppPresets);
llama-cpp-bisect-context = pkgs.writeShellApplication {
name = "llama-cpp-bisect-context";
runtimeInputs = with pkgs; [
coreutils
curl
gnused
python3
util-linux
];
text = builtins.replaceStrings
[ "__LLAMA_CPP_PRESETS__" ]
[ "${llamaCppPresetFile}" ]
(builtins.readFile ./scripts/llama-cpp-bisect-context);
};
in
{
options.${namespace}.services.llama-swap = {
@@ -37,7 +62,6 @@ in
description = "Model swapping for LLaMA C++ Server (or any local OpenAPI compatible server)";
after = [ "network.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
Type = "exec";
ExecStart = "${lib.getExe llama-swap} --listen :8080 --config ${
@@ -97,6 +121,7 @@ in
};
}) apiKeys);
templates."llama-swap.json" = {
restartUnits = [ "llama-swap.service" ];
owner = "llama-swap";
group = "llama-swap";
mode = "0400";
@@ -108,6 +133,8 @@ in
};
};
environment.systemPackages = [ llama-cpp-bisect-context ];
networking.firewall.allowedTCPPorts = [ 8080 ];
};
}

View File

@@ -1,22 +1,54 @@
# vLLM Timings Patch
This scratch directory contains two ways to patch vLLM so its OpenAI-compatible responses include llama.cpp-compatible `timings` data. llama-swap already parses this `timings` object to populate cached tokens, prompt processing speed, and generation speed.
This directory contains the custom timings patch for the current vLLM Docker image used by the llama-swap module:
```text
vllm/vllm-openai:nightly-1acd67a795ebccdf9b9db7697ae9082058301657
```
The patch adds a top-level llama.cpp-compatible `timings` object to OpenAI-compatible responses so llama-swap can populate cached tokens, prompt processing speed, and generation speed.
## Files
- `patch_timings_07351e088.py` disk-edit patch script for running inside the vLLM Docker container before `vllm serve`.
- `vllm-timings-07351e088.patch` — standard unified git patch against `vllm/vllm-openai:nightly-07351e0883470724dd5a7e9730ed10e01fc99d08`.
- `patch_timings_1acd67a.py` — idempotent boot-time disk-edit patch script for the vLLM Docker container.
- `vllm-timings-1acd67a.patch` equivalent standard unified git patch against the current image's vLLM source.
## What The Patch Adds
## Runtime Script
The patch adds a top-level `timings` object to:
Deploy the script under `/mnt/ssd/vLLM/Patches/` and mount it into the container:
- `/v1/chat/completions` non-streaming responses
- `/v1/chat/completions` streaming final usage chunk
- `/v1/completions` non-streaming responses
- `/v1/completions` streaming final usage chunk
```nix
-v /mnt/ssd/vLLM/Patches/patch_timings_1acd67a.py:/patches/patch_timings_1acd67a.py:ro \
```
The object matches llama.cpp's fields:
Run it before `exec vllm serve`:
```bash
python3 /patches/patch_timings_1acd67a.py;
exec vllm serve ...
```
The script is idempotent. Re-running it skips files that already contain `# [patch_timings]`.
## Standard Patch
For a source checkout at commit `1acd67a795ebccdf9b9db7697ae9082058301657`:
```bash
git apply --check /path/to/vllm-timings-1acd67a.patch
git apply /path/to/vllm-timings-1acd67a.patch
```
At container runtime, applying the `.patch` directly is possible if the image has `patch` or `git` installed:
```bash
cd /usr/local/lib/python3.12/dist-packages
patch -p1 < /patches/vllm-timings-1acd67a.patch
```
The Python script remains the safer boot-time option because it is idempotent and does not depend on external patch tools being present in the Docker image.
## Timings Fields
```json
{
@@ -35,78 +67,3 @@ Data comes from vLLM's existing internal `RequestStateStats` and `RequestOutput.
- prompt/prefill time: `first_token_ts - scheduled_ts`
- generation/decode time: `last_token_ts - first_token_ts`
- cached tokens: `num_cached_tokens`
## Option 1: Runtime Docker Patch Script
Copy the script into the deployed patch directory:
```bash
cp _scratch/patch_timings_07351e088.py /mnt/ssd/vLLM/Patches/patch_timings_07351e088.py
```
Add the Docker mount in `/etc/nixos/modules/nixos/services/llama-swap/config.nix`:
```nix
-v /mnt/ssd/vLLM/Patches/patch_timings_07351e088.py:/patches/patch_timings_07351e088.py:ro \
```
Run it before `exec vllm serve` in `vllmCmd`:
```bash
python3 /patches/patch_timings_07351e088.py;
exec vllm serve ...
```
The script is idempotent. Re-running it skips files that already contain `# [patch_timings]`.
## Option 2: Standard Patch File
Use this for a source checkout or future vLLM updates where conflicts can be resolved normally.
From a vLLM checkout at commit `07351e0883470724dd5a7e9730ed10e01fc99d08`:
```bash
git apply /path/to/_scratch/vllm-timings-07351e088.patch
```
Or with `patch`:
```bash
patch -p1 < /path/to/_scratch/vllm-timings-07351e088.patch
```
For future vLLM versions, try:
```bash
git apply --check /path/to/_scratch/vllm-timings-07351e088.patch
```
If it fails, apply manually or with rejects and resolve conflicts around the changed response-construction code.
## Verification Performed
The patch was checked against the Docker tag's pinned commit:
```text
vllm/vllm-openai:nightly-07351e0883470724dd5a7e9730ed10e01fc99d08
```
Validation done locally:
```bash
git apply --check _scratch/vllm-timings-07351e088.patch
git apply _scratch/vllm-timings-07351e088.patch
nix run nixpkgs#python3 -- -m py_compile \
vllm/entrypoints/openai/chat_completion/protocol.py \
vllm/entrypoints/openai/chat_completion/serving.py \
vllm/entrypoints/openai/completion/protocol.py \
vllm/entrypoints/openai/completion/serving.py
```
The runtime `patch_timings_07351e088.py` script was also tested against files extracted from the pinned commit and confirmed idempotent.
## Caveats
- Normal chat completion usage should be correct.
- `/v1/completions` with multiple prompts returns aggregate token counts, but the timing values come from the last completed request. Single-prompt completions are the expected use case.
- Streaming timings are attached only to the final usage chunk, so clients must request/include usage for streaming if they want timings in the stream.

View File

@@ -1,5 +1,5 @@
"""
Disk-edit patch for vLLM nightly-07351e0883470724dd5a7e9730ed10e01fc99d08:
Disk-edit patch for vLLM nightly-1acd67a795ebccdf9b9db7697ae9082058301657:
inject llama.cpp-compatible `timings` into chat/completion API responses.
Adds `timings` to:
@@ -13,7 +13,7 @@ The `timings` object matches llama.cpp fields consumed by llama-swap:
predicted_n, predicted_ms, predicted_per_second, cache_n
Usage, before `exec vllm serve`:
python3 /patches/patch_timings.py
python3 /patches/patch_timings_1acd67a.py
"""
import logging
@@ -85,70 +85,8 @@ def _write(path, content):
def _replace_once(content, old, new, label):
count = content.count(old)
if count == 1:
return content.replace(old, new, 1)
# vLLM v0.20 added system_fingerprint to response constructors. Preserve
# compatibility with the original dev205 anchors by retrying with that
# field inserted when the old anchor is not present.
variants = [
(
old.replace(
" usage=final_usage,\n )",
" usage=final_usage,\n system_fingerprint=self.system_fingerprint,\n )",
),
new.replace(
" usage=final_usage,\n )",
" usage=final_usage,\n system_fingerprint=self.system_fingerprint,\n )",
),
),
(
old.replace(
" usage=usage,\n prompt_logprobs=",
" usage=usage,\n system_fingerprint=self.system_fingerprint,\n prompt_logprobs=",
),
new.replace(
" usage=usage,\n prompt_logprobs=",
" usage=usage,\n system_fingerprint=self.system_fingerprint,\n prompt_logprobs=",
),
),
(
old.replace(
" usage=final_usage_info,\n )",
" usage=final_usage_info,\n system_fingerprint=self.system_fingerprint,\n )",
),
new.replace(
" usage=final_usage_info,\n )",
" usage=final_usage_info,\n system_fingerprint=self.system_fingerprint,\n )",
),
),
(
old.replace(
" usage=usage,\n kv_transfer_params=kv_transfer_params,",
" usage=usage,\n system_fingerprint=self.system_fingerprint,\n kv_transfer_params=kv_transfer_params,",
),
new.replace(
" usage=usage,\n kv_transfer_params=kv_transfer_params,",
" usage=usage,\n system_fingerprint=self.system_fingerprint,\n kv_transfer_params=kv_transfer_params,",
),
),
]
matches = [(variant_old, variant_new) for variant_old, variant_new in variants if content.count(variant_old) == 1]
if len(matches) == 1:
variant_old, variant_new = matches[0]
return content.replace(variant_old, variant_new, 1)
variant_counts = [content.count(variant_old) for variant_old, _ in variants]
raise RuntimeError(f"{label}: anchor matched {count} times; v0.20 variants matched {variant_counts}")
def _replace_once_any(content, replacements, label):
"""Replace exactly one of several version-specific anchors."""
matches = [(old, new) for old, new in replacements if content.count(old) == 1]
if len(matches) != 1:
counts = [content.count(old) for old, _ in replacements]
raise RuntimeError(f"{label}: versioned anchors matched {counts}")
old, new = matches[0]
if count != 1:
raise RuntimeError(f"{label}: anchor matched {count} times")
return content.replace(old, new, 1)
@@ -231,19 +169,19 @@ def _patch_chat_serving(vllm_dir):
label,
)
# Streaming Final Usage Chunk - pinned image has no system_fingerprint arg.
# Streaming Final Usage Chunk
content = _replace_once(
content,
''' final_usage_chunk = ChatCompletionStreamResponse(\n id=request_id,\n object=chunk_object_type,\n created=created_time,\n choices=[],\n model=model_name,\n usage=final_usage,\n )\n''',
f''' final_usage_chunk = ChatCompletionStreamResponse(\n id=request_id,\n object=chunk_object_type,\n created=created_time,\n choices=[],\n model=model_name,\n usage=final_usage,\n )\n # Inject Timings {PATCH_TAG}\n try:\n _s_cached = _last_stream_res.num_cached_tokens\n final_usage_chunk.timings = _compute_timings(\n _last_stream_res.metrics,\n num_prompt_tokens, completion_tokens, _s_cached,\n )\n except NameError:\n pass\n''',
''' final_usage_chunk = ChatCompletionStreamResponse(\n id=request_id,\n object=chunk_object_type,\n created=created_time,\n choices=[],\n model=model_name,\n usage=final_usage,\n system_fingerprint=self.system_fingerprint,\n )\n''',
f''' final_usage_chunk = ChatCompletionStreamResponse(\n id=request_id,\n object=chunk_object_type,\n created=created_time,\n choices=[],\n model=model_name,\n usage=final_usage,\n system_fingerprint=self.system_fingerprint,\n )\n # Inject Timings {PATCH_TAG}\n try:\n _s_cached = _last_stream_res.num_cached_tokens\n final_usage_chunk.timings = _compute_timings(\n _last_stream_res.metrics,\n num_prompt_tokens, completion_tokens, _s_cached,\n )\n except NameError:\n pass\n''',
label,
)
# Non-Streaming Response - pinned image has no system_fingerprint arg.
# Non-Streaming Response
content = _replace_once(
content,
''' response = ChatCompletionResponse(\n id=request_id,\n created=created_time,\n model=model_name,\n choices=choices,\n usage=usage,\n prompt_logprobs=clamp_prompt_logprobs(final_res.prompt_logprobs),\n prompt_token_ids=(\n final_res.prompt_token_ids if request.return_token_ids else None\n ),\n kv_transfer_params=final_res.kv_transfer_params,\n )\n''',
f''' response = ChatCompletionResponse(\n id=request_id,\n created=created_time,\n model=model_name,\n choices=choices,\n usage=usage,\n prompt_logprobs=clamp_prompt_logprobs(final_res.prompt_logprobs),\n prompt_token_ids=(\n final_res.prompt_token_ids if request.return_token_ids else None\n ),\n kv_transfer_params=final_res.kv_transfer_params,\n )\n\n # Inject Timings {PATCH_TAG}\n _cached = final_res.num_cached_tokens\n response.timings = _compute_timings(\n final_res.metrics, num_prompt_tokens, num_generated_tokens,\n _cached,\n )\n''',
''' response = ChatCompletionResponse(\n id=request_id,\n created=created_time,\n model=model_name,\n choices=choices,\n usage=usage,\n system_fingerprint=self.system_fingerprint,\n prompt_logprobs=clamp_prompt_logprobs(final_res.prompt_logprobs),\n prompt_token_ids=(\n final_res.prompt_token_ids if request.return_token_ids else None\n ),\n kv_transfer_params=final_res.kv_transfer_params,\n prompt_routed_experts=prompt_routed_experts,\n )\n''',
f''' response = ChatCompletionResponse(\n id=request_id,\n created=created_time,\n model=model_name,\n choices=choices,\n usage=usage,\n system_fingerprint=self.system_fingerprint,\n prompt_logprobs=clamp_prompt_logprobs(final_res.prompt_logprobs),\n prompt_token_ids=(\n final_res.prompt_token_ids if request.return_token_ids else None\n ),\n kv_transfer_params=final_res.kv_transfer_params,\n prompt_routed_experts=prompt_routed_experts,\n )\n\n # Inject Timings {PATCH_TAG}\n _cached = final_res.num_cached_tokens\n response.timings = _compute_timings(\n final_res.metrics, num_prompt_tokens, num_generated_tokens,\n _cached,\n )\n''',
label,
)
except RuntimeError as e:
@@ -284,19 +222,19 @@ def _patch_completion_serving(vllm_dir):
label,
)
# Streaming Final Usage Chunk - pinned image has no system_fingerprint arg.
# Streaming Final Usage Chunk
content = _replace_once(
content,
''' final_usage_chunk = CompletionStreamResponse(\n id=request_id,\n created=created_time,\n model=model_name,\n choices=[],\n usage=final_usage_info,\n )\n''',
f''' final_usage_chunk = CompletionStreamResponse(\n id=request_id,\n created=created_time,\n model=model_name,\n choices=[],\n usage=final_usage_info,\n )\n # Inject Timings {PATCH_TAG}\n try:\n _sc_cached = _last_comp_res.num_cached_tokens\n final_usage_chunk.timings = _compute_timings(\n _last_comp_res.metrics,\n total_prompt_tokens, total_completion_tokens,\n _sc_cached,\n )\n except NameError:\n pass\n''',
''' final_usage_chunk = CompletionStreamResponse(\n id=request_id,\n created=created_time,\n model=model_name,\n choices=[],\n usage=final_usage_info,\n system_fingerprint=self.system_fingerprint,\n )\n''',
f''' final_usage_chunk = CompletionStreamResponse(\n id=request_id,\n created=created_time,\n model=model_name,\n choices=[],\n usage=final_usage_info,\n system_fingerprint=self.system_fingerprint,\n )\n # Inject Timings {PATCH_TAG}\n try:\n _sc_cached = _last_comp_res.num_cached_tokens\n final_usage_chunk.timings = _compute_timings(\n _last_comp_res.metrics,\n total_prompt_tokens, total_completion_tokens,\n _sc_cached,\n )\n except NameError:\n pass\n''',
label,
)
# Non-Streaming Response - pinned image has no system_fingerprint arg.
# Non-Streaming Response
content = _replace_once(
content,
''' return CompletionResponse(\n id=request_id,\n created=created_time,\n model=model_name,\n choices=choices,\n usage=usage,\n kv_transfer_params=kv_transfer_params,\n )\n''',
f''' _comp_response = CompletionResponse( {PATCH_TAG}\n id=request_id,\n created=created_time,\n model=model_name,\n choices=choices,\n usage=usage,\n kv_transfer_params=kv_transfer_params,\n )\n # Inject Timings {PATCH_TAG}\n if last_final_res is not None:\n _comp_cached = last_final_res.num_cached_tokens\n _comp_response.timings = _compute_timings(\n last_final_res.metrics, num_prompt_tokens,\n num_generated_tokens, _comp_cached,\n )\n return _comp_response\n''',
''' return CompletionResponse(\n id=request_id,\n created=created_time,\n model=model_name,\n choices=choices,\n usage=usage,\n system_fingerprint=self.system_fingerprint,\n kv_transfer_params=kv_transfer_params,\n prompt_routed_experts=prompt_routed_experts,\n )\n''',
f''' _comp_response = CompletionResponse( {PATCH_TAG}\n id=request_id,\n created=created_time,\n model=model_name,\n choices=choices,\n usage=usage,\n system_fingerprint=self.system_fingerprint,\n kv_transfer_params=kv_transfer_params,\n prompt_routed_experts=prompt_routed_experts,\n )\n # Inject Timings {PATCH_TAG}\n if last_final_res is not None:\n _comp_cached = last_final_res.num_cached_tokens\n _comp_response.timings = _compute_timings(\n last_final_res.metrics, num_prompt_tokens,\n num_generated_tokens, _comp_cached,\n )\n return _comp_response\n''',
label,
)
except RuntimeError as e:

View File

@@ -1,8 +1,8 @@
diff --git a/vllm/entrypoints/openai/chat_completion/protocol.py b/vllm/entrypoints/openai/chat_completion/protocol.py
index aacac38..074ca45 100644
index 742f9cc..ade939f 100644
--- a/vllm/entrypoints/openai/chat_completion/protocol.py
+++ b/vllm/entrypoints/openai/chat_completion/protocol.py
@@ -111,6 +111,9 @@ class ChatCompletionResponse(OpenAIBaseModel):
@@ -115,6 +115,9 @@ class ChatCompletionResponse(OpenAIBaseModel):
default=None, description="KVTransfer parameters."
)
@@ -12,7 +12,7 @@ index aacac38..074ca45 100644
class ChatCompletionResponseStreamChoice(OpenAIBaseModel):
index: int
@@ -132,6 +135,9 @@ class ChatCompletionStreamResponse(OpenAIBaseModel):
@@ -139,6 +142,9 @@ class ChatCompletionStreamResponse(OpenAIBaseModel):
# not part of the OpenAI spec but for tracing the tokens
prompt_token_ids: list[int] | None = None
@@ -23,10 +23,10 @@ index aacac38..074ca45 100644
class ChatCompletionToolsParam(OpenAIBaseModel):
type: Literal["function"] = "function"
diff --git a/vllm/entrypoints/openai/chat_completion/serving.py b/vllm/entrypoints/openai/chat_completion/serving.py
index 12dc2cd..c15fb6d 100644
index 1026e0a..a9c5708 100644
--- a/vllm/entrypoints/openai/chat_completion/serving.py
+++ b/vllm/entrypoints/openai/chat_completion/serving.py
@@ -83,6 +83,34 @@ if TYPE_CHECKING:
@@ -79,6 +79,34 @@ if TYPE_CHECKING:
logger = init_logger(__name__)
@@ -61,7 +61,7 @@ index 12dc2cd..c15fb6d 100644
class OpenAIServingChat(OpenAIServing):
def __init__(
self,
@@ -633,6 +661,7 @@ class OpenAIServingChat(OpenAIServing):
@@ -485,6 +513,7 @@ class OpenAIServingChat(OpenAIServing):
try:
async for res in result_generator:
@@ -69,9 +69,9 @@ index 12dc2cd..c15fb6d 100644
if res.prompt_token_ids is not None:
num_prompt_tokens = len(res.prompt_token_ids)
if res.encoder_prompt_token_ids is not None:
@@ -1230,6 +1259,15 @@ class OpenAIServingChat(OpenAIServing):
model=model_name,
@@ -947,6 +976,15 @@ class OpenAIServingChat(OpenAIServing):
usage=final_usage,
system_fingerprint=self.system_fingerprint,
)
+ # Inject Timings # [patch_timings]
+ try:
@@ -85,8 +85,8 @@ index 12dc2cd..c15fb6d 100644
final_usage_data = final_usage_chunk.model_dump_json(
exclude_unset=True, exclude_none=True
)
@@ -1644,6 +1682,13 @@ class OpenAIServingChat(OpenAIServing):
kv_transfer_params=final_res.kv_transfer_params,
@@ -1377,6 +1415,13 @@ class OpenAIServingChat(OpenAIServing):
prompt_routed_experts=prompt_routed_experts,
)
+ # Inject Timings # [patch_timings]
@@ -100,10 +100,10 @@ index 12dc2cd..c15fb6d 100644
if self.enable_log_outputs and self.request_logger:
for choice in choices:
diff --git a/vllm/entrypoints/openai/completion/protocol.py b/vllm/entrypoints/openai/completion/protocol.py
index c785d25..85928f4 100644
index 7bb3c8d..8487e93 100644
--- a/vllm/entrypoints/openai/completion/protocol.py
+++ b/vllm/entrypoints/openai/completion/protocol.py
@@ -485,6 +485,9 @@ class CompletionResponse(OpenAIBaseModel):
@@ -489,6 +489,9 @@ class CompletionResponse(OpenAIBaseModel):
default=None, description="KVTransfer parameters."
)
@@ -113,15 +113,18 @@ index c785d25..85928f4 100644
class CompletionResponseStreamChoice(OpenAIBaseModel):
index: int
@@ -512,3 +515,6 @@ class CompletionStreamResponse(OpenAIBaseModel):
@@ -516,6 +519,9 @@ class CompletionStreamResponse(OpenAIBaseModel):
model: str
choices: list[CompletionResponseStreamChoice]
usage: UsageInfo | None = Field(default=None)
+
+ # llama.cpp-compatible per-request timings # [patch_timings]
+ timings: dict[str, Any] | None = None
# Set only on the final chunk of a stream to mirror non-streaming responses
# without the per-chunk serialization overhead.
system_fingerprint: str | None = None
diff --git a/vllm/entrypoints/openai/completion/serving.py b/vllm/entrypoints/openai/completion/serving.py
index fb7f253..11a5350 100644
index ee4ca9f..8b27011 100644
--- a/vllm/entrypoints/openai/completion/serving.py
+++ b/vllm/entrypoints/openai/completion/serving.py
@@ -48,6 +48,34 @@ if TYPE_CHECKING:
@@ -159,7 +162,7 @@ index fb7f253..11a5350 100644
class OpenAIServingCompletion(OpenAIServing):
def __init__(
self,
@@ -290,6 +318,7 @@ class OpenAIServingCompletion(OpenAIServing):
@@ -291,6 +319,7 @@ class OpenAIServingCompletion(OpenAIServing):
try:
async for prompt_idx, res in result_generator:
@@ -167,9 +170,9 @@ index fb7f253..11a5350 100644
prompt_token_ids = res.prompt_token_ids
prompt_logprobs = res.prompt_logprobs
@@ -434,6 +463,16 @@ class OpenAIServingCompletion(OpenAIServing):
choices=[],
@@ -445,6 +474,16 @@ class OpenAIServingCompletion(OpenAIServing):
usage=final_usage_info,
system_fingerprint=self.system_fingerprint,
)
+ # Inject Timings # [patch_timings]
+ try:
@@ -184,18 +187,18 @@ index fb7f253..11a5350 100644
final_usage_data = final_usage_chunk.model_dump_json(
exclude_unset=False, exclude_none=True
)
@@ -556,7 +595,7 @@ class OpenAIServingCompletion(OpenAIServing):
request_metadata.final_usage_info = usage
if final_res_batch:
kv_transfer_params = final_res_batch[0].kv_transfer_params
@@ -577,7 +616,7 @@ class OpenAIServingCompletion(OpenAIServing):
if pre is not None:
prompt_routed_experts = pre.tolist()
- return CompletionResponse(
+ _comp_response = CompletionResponse( # [patch_timings]
id=request_id,
created=created_time,
model=model_name,
@@ -564,6 +603,14 @@ class OpenAIServingCompletion(OpenAIServing):
usage=usage,
@@ -587,6 +626,14 @@ class OpenAIServingCompletion(OpenAIServing):
kv_transfer_params=kv_transfer_params,
prompt_routed_experts=prompt_routed_experts,
)
+ # Inject Timings # [patch_timings]
+ if last_final_res is not None:

View File

@@ -0,0 +1,464 @@
#!/usr/bin/env bash
set -Eeuo pipefail
usage() {
cat <<'EOF'
Usage:
llama-cpp-bisect-context MODEL --low N --high N [options]
llama-cpp-bisect-context --cmd-template CMD --low N --high N [options]
llama-cpp-bisect-context --cmd-file FILE --low N --high N [options]
Bisect the largest llama.cpp llama-server context that can start and complete
a near-context prompt without OOMing. Startup-only mode is available for isolating the first cliff.
MODEL is a llama.cpp preset generated from the Nix llama-swap config.
Command templates are evaluated with these environment variables:
PORT random listen port for this trial
CTX candidate context size
Options:
--cmd-template CMD llama-server command, e.g. 'llama-server --port ${PORT} -c ${CTX} ...'
--cmd-file FILE executable or shell snippet using $PORT and $CTX
--preset-file FILE preset JSON file (default: Nix-generated presets)
--list-presets list available Nix-generated presets and exit
--low N known/assumed lower context bound
--high N upper context bound to test
--step N stop when high-low <= N (default: 1024)
--prompt-ratio PCT prompt fill target as percent of CTX (default: 90)
--chars-per-token N rough prompt sizing ratio (default: 4)
--prompt-turns N split the prompt across N user/assistant turns (default: 4)
--max-tokens N generated tokens for prompt test (default: 32)
--startup-timeout SEC seconds to wait for /health readiness (default: 300)
--request-timeout SEC seconds to wait for prompt response (default: 600)
--cooldown SEC seconds to sleep after stopping server (default: 5)
--startup-only only test server startup, not prompt/runtime OOM
--verbose print llama-server logs for each failed trial
--keep-logs keep trial logs after a successful run too
-h, --help show this help
Examples:
llama-cpp-bisect-context \
--cmd-template 'llama-server --port ${PORT} -m model.gguf -c ${CTX} -ngl 99' \
--low 32768 --high 196608
llama-cpp-bisect-context qwen3.6-27b-ik-cuda0 --low 32768 --high 180000
llama-cpp-bisect-context --cmd-file ./server-command.sh --low 32768 --high 196608
EOF
}
preset_model=""
preset_file="__LLAMA_CPP_PRESETS__"
list_presets=0
cmd_template=""
cmd_file=""
low=""
high=""
step=1024
prompt_ratio=90
chars_per_token=4
prompt_turns=4
max_tokens=32
startup_timeout=300
request_timeout=600
cooldown=5
startup_only=0
verbose=0
keep_logs=0
while [[ $# -gt 0 ]]; do
case "$1" in
--cmd-template) cmd_template="${2:-}"; shift 2 ;;
--cmd-file) cmd_file="${2:-}"; shift 2 ;;
--preset-file) preset_file="${2:-}"; shift 2 ;;
--list-presets) list_presets=1; shift ;;
--low) low="${2:-}"; shift 2 ;;
--high) high="${2:-}"; shift 2 ;;
--step) step="${2:-}"; shift 2 ;;
--prompt-ratio) prompt_ratio="${2:-}"; shift 2 ;;
--chars-per-token) chars_per_token="${2:-}"; shift 2 ;;
--prompt-turns) prompt_turns="${2:-}"; shift 2 ;;
--max-tokens) max_tokens="${2:-}"; shift 2 ;;
--startup-timeout) startup_timeout="${2:-}"; shift 2 ;;
--request-timeout) request_timeout="${2:-}"; shift 2 ;;
--cooldown) cooldown="${2:-}"; shift 2 ;;
--startup-only) startup_only=1; shift ;;
--verbose) verbose=1; shift ;;
--keep-logs) keep_logs=1; shift ;;
-h|--help) usage; exit 0 ;;
--*) echo "unknown argument: $1" >&2; usage >&2; exit 2 ;;
*)
if [[ -n "$preset_model" ]]; then
echo "unexpected positional argument: $1" >&2
usage >&2
exit 2
fi
preset_model="$1"
shift
;;
esac
done
list_presets_json() {
python3 - "$preset_file" <<'PY'
import json
import sys
with open(sys.argv[1]) as f:
presets = json.load(f)
for key in sorted(presets):
name = presets[key].get("name", "")
print(f"{key}\t{name}" if name else key)
PY
exit 0
}
if (( list_presets )); then
list_presets_json
fi
load_preset() {
local command_file="$tmpdir/preset-command.sh"
python3 - "$preset_file" "$preset_model" "$command_file" <<'PY'
import json
import shlex
import sys
preset_file, model_id, command_file = sys.argv[1:]
with open(preset_file) as f:
presets = json.load(f)
try:
preset = presets[model_id]
except KeyError:
print(f"unknown preset: {model_id}", file=sys.stderr)
print("available presets:", file=sys.stderr)
for key in sorted(presets):
print(f" {key}", file=sys.stderr)
sys.exit(2)
cmd = preset["cmd"].replace("${ctx}", "${CTX}").replace("$ctx", "${CTX}")
env = preset.get("env", [])
with open(command_file, "w") as f:
f.write("set -e\n")
for item in env:
key, sep, value = item.partition("=")
if not sep or not key:
continue
f.write(f"export {key}={shlex.quote(value)}\n")
f.write(cmd)
if not cmd.endswith("\n"):
f.write("\n")
PY
cmd_file="$command_file"
}
require_int() {
local name="$1" value="$2"
if [[ ! "$value" =~ ^[0-9]+$ ]]; then
echo "$name must be a positive integer" >&2
exit 2
fi
}
mode_count=0
[[ -n "$preset_model" ]] && mode_count=$((mode_count + 1))
[[ -n "$cmd_template" ]] && mode_count=$((mode_count + 1))
[[ -n "$cmd_file" ]] && mode_count=$((mode_count + 1))
if (( mode_count != 1 )); then
echo "use exactly one of MODEL, --cmd-template, or --cmd-file" >&2
exit 2
fi
if [[ -z "$low" || -z "$high" ]]; then
echo "missing --low or --high" >&2
exit 2
fi
for pair in \
"--low:$low" \
"--high:$high" \
"--step:$step" \
"--prompt-ratio:$prompt_ratio" \
"--chars-per-token:$chars_per_token" \
"--prompt-turns:$prompt_turns" \
"--max-tokens:$max_tokens" \
"--startup-timeout:$startup_timeout" \
"--request-timeout:$request_timeout" \
"--cooldown:$cooldown"; do
require_int "${pair%%:*}" "${pair#*:}"
done
if (( low <= 0 || high <= low || step <= 0 || prompt_ratio <= 0 || chars_per_token <= 0 || prompt_turns <= 0 )); then
echo "invalid numeric bounds/options" >&2
exit 2
fi
if [[ -n "$cmd_file" && ! -f "$cmd_file" ]]; then
echo "cmd file not found: $cmd_file" >&2
exit 2
fi
for dep in curl python3; do
if ! command -v "$dep" >/dev/null 2>&1; then
echo "missing required command: $dep" >&2
exit 2
fi
done
tmpdir="$(mktemp -d)"
server_pid=""
log_file=""
terminate_server() {
if [[ -z "${server_pid:-}" ]]; then
return 0
fi
kill -- "-${server_pid}" >/dev/null 2>&1 || true
kill "$server_pid" >/dev/null 2>&1 || true
local waited=0
while kill -0 "$server_pid" >/dev/null 2>&1 && (( waited < 30 )); do
sleep 1
waited=$((waited + 1))
done
if kill -0 "$server_pid" >/dev/null 2>&1; then
kill -9 -- "-${server_pid}" >/dev/null 2>&1 || true
kill -9 "$server_pid" >/dev/null 2>&1 || true
fi
wait "$server_pid" >/dev/null 2>&1 || true
server_pid=""
}
cleanup() {
local status=$?
trap - EXIT INT TERM HUP
terminate_server
if (( keep_logs || status != 0 )); then
echo "logs kept in: $tmpdir" >&2
else
rm -rf "$tmpdir"
fi
}
interrupt() {
echo "interrupted; stopping llama-server" >&2
exit 130
}
trap cleanup EXIT
trap interrupt INT TERM HUP
if [[ -n "$preset_model" ]]; then
load_preset
fi
free_port() {
python3 - <<'PY'
import socket
with socket.socket() as s:
s.bind(("127.0.0.1", 0))
print(s.getsockname()[1])
PY
}
start_server() {
local ctx="$1"
PORT="$(free_port)"
CTX="$ctx"
export PORT CTX
log_file="$tmpdir/llama-server-${ctx}.log"
{
printf 'CTX=%s\n' "$CTX"
printf 'PORT=%s\n' "$PORT"
if [[ -n "$cmd_file" ]]; then
printf 'CMD_FILE=%s\n' "$cmd_file"
else
printf 'CMD_TEMPLATE=%s\n' "$cmd_template"
fi
printf -- '--- llama-server output ---\n'
} >"$log_file"
if [[ -n "$cmd_file" ]]; then
setsid bash "$cmd_file" >>"$log_file" 2>&1 &
else
setsid bash -c "$cmd_template" >>"$log_file" 2>&1 &
fi
server_pid="$!"
}
stop_server() {
terminate_server
sleep "$cooldown"
}
print_failure_log() {
local label="$1" ctx="$2"
echo "[$label] ctx=$ctx failed; log: $log_file" >&2
if (( verbose )) && [[ -f "$log_file" ]]; then
sed -n '1,220p' "$log_file" >&2 || true
fi
}
wait_ready() {
local deadline=$((SECONDS + startup_timeout))
while (( SECONDS < deadline )); do
if [[ -n "${server_pid:-}" ]] && ! kill -0 "$server_pid" >/dev/null 2>&1; then
return 1
fi
if curl -fsS --max-time 5 "http://127.0.0.1:${PORT}/health" >/dev/null 2>&1; then
return 0
fi
if curl -fsS --max-time 5 "http://127.0.0.1:${PORT}/v1/models" >/dev/null 2>&1; then
return 0
fi
sleep 2
done
return 1
}
make_prompt_json() {
local ctx="$1"
local approx_tokens=$(( ctx * prompt_ratio / 100 ))
local chars=$(( approx_tokens * chars_per_token ))
python3 - "$chars" "$max_tokens" "$prompt_turns" <<'PY'
import json
import sys
chars = int(sys.argv[1])
max_tokens = int(sys.argv[2])
prompt_turns = int(sys.argv[3])
seed = (
"This is deterministic context filler for memory testing. "
"It uses normal words so token estimates are closer to real prompts. "
)
messages = []
remaining = chars
for turn in range(prompt_turns):
turns_left = prompt_turns - turn
chunk_chars = max(1, remaining // turns_left)
content = (seed * ((chunk_chars // len(seed)) + 1))[:chunk_chars]
messages.append({"role": "user", "content": content})
remaining -= chunk_chars
if turn != prompt_turns - 1:
messages.append({"role": "assistant", "content": "Acknowledged."})
print(json.dumps({
"messages": messages,
"max_tokens": max_tokens,
"temperature": 0,
"stream": False,
}))
PY
}
run_prompt() {
local ctx="$1"
local payload="$tmpdir/prompt-${ctx}.json"
make_prompt_json "$ctx" >"$payload"
curl -fsS \
--max-time "$request_timeout" \
-H 'Content-Type: application/json' \
-d "@$payload" \
"http://127.0.0.1:${PORT}/v1/chat/completions" \
>/dev/null
}
test_startup() {
local ctx="$1"
echo "[startup] testing ctx=$ctx" >&2
start_server "$ctx"
if wait_ready; then
stop_server
echo "[startup] ctx=$ctx PASS" >&2
return 0
fi
print_failure_log startup "$ctx"
stop_server
return 1
}
test_qualified_context() {
local ctx="$1"
echo "[ctx] testing ctx=$ctx with prompt_ratio=${prompt_ratio}% prompt_turns=${prompt_turns}" >&2
start_server "$ctx"
if ! wait_ready; then
print_failure_log ctx-startup "$ctx"
stop_server
return 1
fi
if run_prompt "$ctx"; then
stop_server
echo "[ctx] ctx=$ctx PASS" >&2
return 0
fi
print_failure_log ctx-prompt "$ctx"
stop_server
return 1
}
bisect_max() {
local label="$1" pass="$2" fail="$3" fn="$4"
while (( fail - pass > step )); do
local mid=$(( (pass + fail) / 2 ))
if "$fn" "$mid"; then
pass="$mid"
else
fail="$mid"
fi
done
printf '%s:%s:%s\n' "$label" "$pass" "$fail"
}
if (( startup_only )); then
if ! test_startup "$low"; then
echo "low bound does not pass startup: $low" >&2
exit 1
fi
result="$(bisect_max startup "$low" "$high" test_startup)"
pass="$(cut -d: -f2 <<<"$result")"
fail="$(cut -d: -f3 <<<"$result")"
printf '\nResult:\n'
printf ' startup max passing ctx: %s\n' "$pass"
printf ' startup min failing ctx: %s\n' "$fail"
python3 - "$pass" "$fail" <<'PY'
import json
import sys
max_passing, min_failing = map(int, sys.argv[1:])
print(json.dumps({"startup": {"maxPassingCtx": max_passing, "minFailingCtx": min_failing}}, indent=2))
PY
exit 0
fi
if ! test_qualified_context "$low"; then
echo "low bound does not pass qualified context test: $low" >&2
exit 1
fi
result="$(bisect_max context "$low" "$high" test_qualified_context)"
pass="$(cut -d: -f2 <<<"$result")"
fail="$(cut -d: -f3 <<<"$result")"
printf '\nResult:\n'
printf ' context max passing ctx: %s\n' "$pass"
printf ' context min failing ctx: %s\n' "$fail"
printf ' prompt ratio: %s%%\n' "$prompt_ratio"
printf ' prompt turns: %s\n' "$prompt_turns"
python3 - "$pass" "$fail" "$prompt_ratio" "$prompt_turns" <<'PY'
import json
import sys
max_passing = int(sys.argv[1])
min_failing = int(sys.argv[2])
prompt_ratio = int(sys.argv[3])
prompt_turns = int(sys.argv[4])
print(json.dumps({
"context": {
"maxPassingCtx": max_passing,
"minFailingCtx": min_failing,
"promptRatio": prompt_ratio,
"promptTurns": prompt_turns,
}
}, indent=2))
PY

View File

@@ -1,10 +1,6 @@
#!/usr/bin/env bash
# Setup script for vLLM Qwen3.6-27B on a single 3090.
#
# Downloads the model, clones Genesis patches (pinned), applies setup-time
# source patches to the Genesis tree, and fetches all boot-time sidecar
# patches into place under /mnt/ssd/vLLM/.
#
# Idempotent - safe to re-run; skips steps already completed.
#
# Prerequisites: git (with git-lfs), docker
@@ -15,17 +11,18 @@ set -euo pipefail
MODEL_DIR="/mnt/ssd/vLLM/Models"
MODEL_SUBDIR="qwen3.6-27b-autoround-int4"
PATCHES_DIR="/mnt/ssd/vLLM/Patches"
TEMPLATES_DIR="/mnt/ssd/vLLM/Templates"
CACHE_DIR="/mnt/ssd/vLLM/Cache"
GENESIS_DIR="${PATCHES_DIR}/genesis"
GENESIS_PIN="${GENESIS_PIN:-7b9fd319}"
# 3090 Patches
BASE_3090_PATCH_URL="https://raw.githubusercontent.com/noonghunna/club-3090/v7.69-cliff2-test/models/qwen3.6-27b/vllm/patches"
INPUTS_EMBEDS_PATCH="${PATCHES_DIR}/patch_inputs_embeds_optional.py"
# Timings Patch
TIMINGS_PATCH="${PATCHES_DIR}/patch_timings_07351e088.py"
TIMINGS_PATCH_URL="${TIMINGS_PATCH_URL:-https://gitea.va.reichard.io/evan/nix/raw/branch/master/modules/nixos/services/llama-swap/patches/patch_timings_07351e088.py}"
TIMINGS_PATCH="${PATCHES_DIR}/patch_timings_1acd67a.py"
TIMINGS_PATCH_URL="${TIMINGS_PATCH_URL:-https://gitea.va.reichard.io/evan/nix/raw/branch/master/modules/nixos/services/llama-swap/patches/patch_timings_1acd67a.py}"
# Template
TEMPLATE="${TEMPLATES_DIR}/chat_template-v11.jinja"
TEMPLATE_URL="https://huggingface.co/froggeric/Qwen-Fixed-Chat-Templates/resolve/main/qwen3.6/chat_template-v11.jinja"
# ---------- Preflight Checks ----------
for cmd in git git-lfs curl; do
@@ -37,7 +34,7 @@ done
# ---------- Create Directories ----------
echo "Creating directories..."
mkdir -p "${MODEL_DIR}" "${PATCHES_DIR}" "${CACHE_DIR}/torch_compile" "${CACHE_DIR}/triton"
mkdir -p "${TEMPLATES_DIR}" "${MODEL_DIR}" "${PATCHES_DIR}" "${CACHE_DIR}/torch_compile" "${CACHE_DIR}/triton"
# ---------- Download Model ----------
if [ -d "${MODEL_DIR}/${MODEL_SUBDIR}/.git" ]; then
@@ -67,36 +64,31 @@ if [[ ! -d "${GENESIS_DIR}/vllm/_genesis" ]]; then
fi
echo "Genesis pinned to ${GENESIS_PIN} ($(cd "${GENESIS_DIR}" && git rev-parse --short HEAD))"
# ---------- Download Sidecar Patches ----------
download_patch() {
local dest="$1"
local filename
filename="$(basename "$dest")"
if [ -f "${dest}" ]; then
echo "Patch ${filename} already present, skipping."
# ---------- Download URL Patch ----------
install_via_url() {
local name="$1"
local url="$2"
local dest="$3"
local tmp_patch
tmp_patch="$(mktemp)"
echo "Downloading ${name}..."
curl -fsSL "${url}" -o "${tmp_patch}"
if [ -f "${dest}" ] && cmp -s "${tmp_patch}" "${dest}"; then
echo "${name} already current at ${dest}, skipping."
else
echo "Downloading ${filename}..."
curl -fsSL "${BASE_3090_PATCH_URL}/${filename}" -o "${dest}"
echo "Patch ${filename} written."
echo "Installing ${name} to ${dest}..."
install -m 0644 "${tmp_patch}" "${dest}"
echo "${name} installed."
fi
rm -f "${tmp_patch}"
}
download_patch "${INPUTS_EMBEDS_PATCH}"
# ---------- Download Timing Patch ----------
tmp_timings_patch="$(mktemp)"
trap 'rm -f "${tmp_timings_patch}"' EXIT
echo "Downloading patch_timings_07351e088.py from this repo..."
curl -fsSL "${TIMINGS_PATCH_URL}" -o "${tmp_timings_patch}"
if [ -f "${TIMINGS_PATCH}" ] && cmp -s "${tmp_timings_patch}" "${TIMINGS_PATCH}"; then
echo "Timing patch already current at ${TIMINGS_PATCH}, skipping."
else
echo "Installing timing patch to ${TIMINGS_PATCH}..."
install -m 0644 "${tmp_timings_patch}" "${TIMINGS_PATCH}"
echo "Timing patch installed."
fi
# ---------- Download Assets ----------
install_via_url "patch_timings_1acd67a.py" "${TIMINGS_PATCH_URL}" "${TIMINGS_PATCH}"
install_via_url "chat_template-v11.jinja" "${TEMPLATE_URL}" "${TEMPLATE}"
# ---------- Summary ----------
echo ""
@@ -109,11 +101,12 @@ echo "Expected layout:"
echo " /mnt/ssd/vLLM/"
echo " ├── Models/"
echo " │ └── qwen3.6-27b-autoround-int4/ (model weights)"
echo " ├── Templates/"
echo " │ └── chat_template-v11.jinja (chat template)"
echo " ├── Cache/"
echo " │ ├── torch_compile/ (torch.compile cache)"
echo " │ └── triton/ (Triton kernel cache)"
echo " └── Patches/"
echo " ├── genesis/ (Genesis @ ${GENESIS_PIN})"
echo " │ └── vllm/_genesis/ (mounted into container)"
echo " ── patch_inputs_embeds_optional.py (boot-time: vllm#35975 backport, text-only models)"
echo " └── patch_timings_07351e088.py (boot-time: llama.cpp-compatible timings)"
echo " ── patch_timings_1acd67a.py (boot-time: llama.cpp-compatible timings)"

View File

@@ -1,4 +1,4 @@
{ inputs, ... }:
final: _prev: {
firefox-addons = inputs.firefox-addons.packages.${final.system};
firefox-addons = inputs.firefox-addons.packages.${final.stdenv.hostPlatform.system};
}

View File

@@ -1,12 +0,0 @@
# Workaround for aarch64-darwin codesigning bug (nixpkgs#208951 / #507531):
# fish binaries from the binary cache occasionally have invalid ad-hoc
# signatures on Apple Silicon. Forcing a local rebuild ensures codesigning
# is applied on this machine with a valid signature.
{ inputs, ... }:
final: prev: {
fish = prev.fish.overrideAttrs (_old: {
# Bust the cache key so fish is always built locally rather than
# substituted from the binary cache where the signature may be stale.
NIX_FORCE_LOCAL_REBUILD = "darwin-codesign-fix";
});
}

View File

@@ -5,12 +5,12 @@
buildGoModule rec {
pname = "conduit";
version = "unstable-2026-05-03";
version = "unstable-2026-05-15";
src = fetchgit {
url = "https://gitea.va.reichard.io/evan/conduit.git";
rev = "9edea27148670b208c935c070ff3f58a416241b1";
hash = "sha256-s8/ghyoAyFOvAMhE7vzckEZ8OxIF116OyJ4Uj30s65A=";
rev = "8dfb14f1e7f952bee92cad29703dba55fb156f0c";
hash = "sha256-Fc0FHLCNBbEpOFFD0bHSDo1E5AsOzL2fJzHufleKBIo=";
};
vendorHash = "sha256-LOFT8eCNRm5Q2tVl7ifu4dB5cr828B/E2NJW5WiW0LI=";

View File

@@ -0,0 +1,43 @@
{ pkgs }:
let
rev = "f9a93c37e2fc021760c3c1aa99cf74c73b7591a7";
src = pkgs.fetchFromGitHub {
owner = "ikawrakow";
repo = "ik_llama.cpp";
inherit rev;
hash = "sha256-vBVosqBi8FyrllWGJOYsOYaNYAKoTTq6bn+i0Y32pu4=";
leaveDotGit = true;
postFetch = ''
git -C "$out" rev-parse --short HEAD > $out/COMMIT
find "$out" -name .git -print0 | xargs -0 rm -rf
'';
};
in
(pkgs.callPackage "${src}/.devops/nix/package.nix" {
useCuda = true;
useVulkan = true;
useBlas = true;
useRocm = false;
useMetalKit = false;
}).overrideAttrs
(oldAttrs: {
inherit src;
# Add SPIR-V Headers for Vulkan Backend
# Newer ggml requires spirv/unified1/spirv.hpp which isn't pulled in by
# vulkan-headers alone.
buildInputs = (oldAttrs.buildInputs or [ ]) ++ [ pkgs.spirv-headers ];
# Auto CPU Optimizations + CUDA Arches
# Appended after upstream's flags so CMAKE_CUDA_ARCHITECTURES wins.
cmakeFlags = (oldAttrs.cmakeFlags or [ ]) ++ [
"-DGGML_CUDA_ENABLE_UNIFIED_MEMORY=1"
"-DCMAKE_CUDA_ARCHITECTURES=61;86" # GTX 1070 / GTX 1080ti / RTX 3090
];
# Disable Nix's march=native Stripping
preConfigure = ''
export NIX_ENFORCE_NO_NATIVE=0
${oldAttrs.preConfigure or ""}
'';
})

View File

@@ -0,0 +1,35 @@
# llama-cpp — Agent Notes
Override of `pkgs.llama-cpp` with CUDA + Vulkan + BLAS, custom CMake flags, and an optional fork pin.
## Pitfalls
### `version` must be numeric
Upstream `pkgs/by-name/ll/llama-cpp/package.nix` passes `version` straight through as a C integer via:
```nix
(cmakeFeature "LLAMA_BUILD_NUMBER" finalAttrs.version)
```
`build-info.cpp` then emits `int LLAMA_BUILD_NUMBER = <version>;`. A non-numeric `version` (e.g. `"mtp-clean-08b1474"`) breaks the build with:
```
error: '<value>' was not declared in this scope
int LLAMA_BUILD_NUMBER = <value>;
```
**Convention:**
- Upstream tag pins: use the bare build number, e.g. `version = "9048";` with `tag = "b${version}";`.
- Fork / arbitrary commit pins: use a `YYYYMMDD` date derived from the commit's author/commit date (`gh api repos/<owner>/<repo>/commits/<sha>``.commit.committer.date`).
### `leaveDotGit` + `postFetch`
We keep `.git` only long enough to record the short SHA into `$out/COMMIT`, then strip it. Preserve this pattern when changing `src` so downstream tooling that reads `COMMIT` keeps working.
## Refreshing the pinned commit (fork)
1. `git ls-remote https://github.com/<owner>/llama.cpp refs/heads/<branch>` → get the full SHA.
2. `nix run nixpkgs#nix-prefetch-github -- <owner> llama.cpp --rev <sha> --leave-dot-git` → get the hash.
3. Look up the commit date: `curl -s https://api.github.com/repos/<owner>/llama.cpp/commits/<sha> | jq -r '.commit.committer.date'`.
4. Update `src.{owner,rev,hash}` and set `version = "YYYYMMDD"`.

View File

@@ -1,4 +1,22 @@
{ pkgs }:
let
# Version MUST be an integer string.
# For tagged releases use the tag number (e.g. "9222").
# For HEAD builds use YYYYMMDD (e.g. "20260519").
version = "9496";
src = pkgs.fetchFromGitHub {
owner = "ggml-org";
repo = "llama.cpp";
rev = "94a220cd6745e6e3f8de62870b66fd5b9bc92700";
hash = "sha256-1jAowfGVzrrHDwWWzKESY7aV82whnuIg1N37fmtcgyw=";
leaveDotGit = true;
postFetch = ''
git -C "$out" rev-parse --short HEAD > $out/COMMIT
find "$out" -name .git -print0 | xargs -0 rm -rf
'';
};
in
(pkgs.llama-cpp.override {
cudaSupport = true;
blasSupport = true;
@@ -6,19 +24,13 @@
metalSupport = false;
vulkanSupport = true;
}).overrideAttrs
(oldAttrs: rec {
version = "9009";
src = pkgs.fetchFromGitHub {
owner = "ggml-org";
repo = "llama.cpp";
tag = "b${version}";
hash = "sha256-rDnO+NIjXYtMqxOeYvecQsB7KgohVUuMV4jG6L0b4S4=";
leaveDotGit = true;
postFetch = ''
git -C "$out" rev-parse --short HEAD > $out/COMMIT
find "$out" -name .git -print0 | xargs -0 rm -rf
'';
};
(oldAttrs: {
inherit version src;
# WebUI npm deps hash for our pinned src. Upstream nixpkgs builds the WebUI
# from tools/ui via `npm run build` in preConfigure (offline, using these
# deps), so no custom webui derivation / HF-bucket workaround is needed.
npmDepsHash = "sha256-1iM0LGeI9e+gZEHk46lkBe51DxIhiimfAm9o3Z3m9Ik=";
# Add SPIR-V Headers for Vulkan Backend
# Newer llama.cpp requires spirv/unified1/spirv.hpp which isn't

View File

@@ -13,13 +13,13 @@ let
in
buildGo126Module (finalAttrs: {
pname = "llama-swap";
version = "208";
version = "216";
src = fetchFromGitHub {
owner = "mostlygeek";
repo = "llama-swap";
tag = "v${finalAttrs.version}";
hash = "sha256-E+BqqQcCLlW/DWvjwC66ClV6yuQ5x7cAMkLPJkS3x5M=";
hash = "sha256-PHSY4z2h406xL+EcIYyrzr4s28txO7SCsWm8hrXf+2U=";
# populate values that require us to use git. By doing this in postFetch we
# can delete .git afterwards and maintain better reproducibility of the src.
leaveDotGit = true;
@@ -32,10 +32,10 @@ buildGo126Module (finalAttrs: {
'';
};
vendorHash = "sha256-tOOZgugiVcICYg9HyeTolyAg+YZWtxSJTvAuwfMazHQ=";
vendorHash = "sha256-QysQ7YdwJcLTziwL25j73n3tQVvzVQIFxN4GkTU8JZg=";
passthru.ui = callPackage ./ui.nix { llama-swap = finalAttrs.finalPackage; };
passthru.npmDepsHash = "sha256-6D4F58sSBkr7FKKO34gDhnZ9uN/SfsyYn1xJjYsMeq4=";
passthru.npmDepsHash = "sha256-NJqEJ+XTdpPFtJJxP4CGu+JDUW7lKDcFgsixQJ3SXtQ=";
nativeBuildInputs = [
versionCheckHook

View File

@@ -0,0 +1,25 @@
{ lib
, buildGoModule
, fetchgit
}:
buildGoModule rec {
pname = "open-proxy";
version = "unstable-2026-06-16";
src = fetchgit {
url = "https://gitea.va.reichard.io/evan/open-proxy.git";
rev = "a589341214a1e035b6ce2b2d79870e591a25ccca";
hash = "sha256-onfvxOl4TdeRrVLD1oJWcnhEDzKFYU/V0qxV1+NpQrg=";
};
vendorHash = null;
meta = {
description = "Forward `open`/`xdg-open` from a remote VM to the host machine";
homepage = "https://gitea.va.reichard.io/evan/open-proxy";
license = lib.licenses.mit;
maintainers = with lib.maintainers; [ evanreichard ];
mainProgram = "open-proxy";
};
}

View File

@@ -0,0 +1,23 @@
# pi-coding-agent Packaging Notes
`pi-coding-agent` is built from the `earendil-works/pi-mono` monorepo with `buildNpmPackage`.
## Lockfile Metadata
Upstream `package-lock.json` may omit `resolved` / `integrity` metadata that npm can recover online, but Nix needs for its offline npm cache. Keep a package-local enriched lockfile at `packages/pi-coding-agent/package-lock.json` and copy it in during `prePatch` before `npmConfigHook` validates/generates `npmDeps`.
After bumping `version` in `default.nix`, regenerate it with:
```bash
node packages/pi-coding-agent/update-lockfile.mjs
# or explicitly:
node packages/pi-coding-agent/update-lockfile.mjs 0.74.0
```
Then refresh `npmDepsHash` from the FOD mismatch:
```bash
nix build .#packages.aarch64-linux.pi-coding-agent.npmDeps --no-link
```
Remember: new files must be `git add`ed before the flake can see them.

View File

@@ -3,6 +3,8 @@
, fetchFromGitHub
, nodejs
, nodejs_22
, firefox
, geckodriver
, makeWrapper
, pkg-config
, pixman
@@ -16,19 +18,24 @@
buildNpmPackage rec {
pname = "pi-coding-agent";
version = "0.72.1";
version = "0.78.1";
src = fetchFromGitHub {
owner = "badlogic";
owner = "earendil-works";
repo = "pi-mono";
rev = "v${version}";
hash = "sha256-SqUxghc60P3HfmaFJGB/m23mvzw0cD7cDEUrNFOqo0Y=";
hash = "sha256-K5+reVdi9LPwUHxFgM1iFWojuj6M/m25ymhkDOQdBE4=";
};
npmDepsHash = "sha256-KUC1xQK6oJXtg962YeLOnO76uTdR10/VNa9iiCdT3VM=";
npmDepsHash = "sha256-PknwCOAr61Fq2Mhl6jd79Rdsje1OXFts2MDLM/gIEYE=";
nativeBuildInputs = [ pkg-config makeWrapper ];
# Restore NPM Metadata - upstream lockfile omits resolved/integrity entries needed by buildNpmPackage.
prePatch = ''
cp ${./package-lock.json} package-lock.json
'';
buildInputs = [
pixman
cairo
@@ -41,7 +48,7 @@ buildNpmPackage rec {
# Skip generate-models in ai package (models.generated.ts already in repo)
preBuild = ''
substituteInPlace packages/ai/package.json \
--replace-fail '"build": "npm run generate-models && tsgo -p tsconfig.build.json"' \
--replace-fail '"build": "npm run generate-models && npm run generate-image-models && tsgo -p tsconfig.build.json"' \
'"build": "tsgo -p tsconfig.build.json"'
'';
@@ -73,16 +80,21 @@ buildNpmPackage rec {
chmod +x $out/bin/pi
wrapProgram $out/bin/pi \
--prefix PATH : ${lib.makeBinPath [ nodejs_22 ]}
--prefix PATH : ${lib.makeBinPath [
nodejs_22
# evan/pi-web - Browser automation tools are needed for web-fetch support.
firefox
geckodriver
]}
runHook postInstall
'';
meta = {
description = "Coding agent CLI with read, bash, edit, write tools and session management";
homepage = "https://github.com/badlogic/pi-mono";
downloadPage = "https://www.npmjs.com/package/@mariozechner/pi-coding-agent";
changelog = "https://github.com/badlogic/pi-mono/blob/main/packages/coding-agent/CHANGELOG.md";
homepage = "https://github.com/earendil-works/pi-mono";
downloadPage = "https://www.npmjs.com/package/@earendil-works/pi-coding-agent";
changelog = "https://github.com/earendil-works/pi-mono/blob/main/packages/coding-agent/CHANGELOG.md";
license = lib.licenses.mit;
maintainers = with lib.maintainers; [ evanreichard ];
mainProgram = "pi";

6375
packages/pi-coding-agent/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,92 @@
#!/usr/bin/env node
import fs from 'node:fs/promises';
import path from 'node:path';
import process from 'node:process';
const repoRoot = new URL('../..', import.meta.url);
const packageDir = new URL('.', import.meta.url);
const defaultNixPath = new URL('default.nix', packageDir);
const lockfilePath = new URL('package-lock.json', packageDir);
const registryCache = new Map();
// Version Selection
async function getVersion() {
const argVersion = process.argv[2];
if (argVersion) return argVersion.replace(/^v/, '');
const defaultNix = await fs.readFile(defaultNixPath, 'utf8');
const match = defaultNix.match(/version\s*=\s*"([^"]+)";/);
if (!match) throw new Error(`Unable to find version in ${defaultNixPath.pathname}`);
return match[1];
}
// Package Name Extraction
function packageNameFromLockPath(lockPath) {
const parts = lockPath.split('/');
const idx = parts.lastIndexOf('node_modules');
if (idx < 0 || idx + 1 >= parts.length) return null;
const first = parts[idx + 1];
if (first.startsWith('@')) return `${first}/${parts[idx + 2]}`;
return first;
}
// Registry Fetching
function registryUrl(name, version) {
const encodedName = name.startsWith('@') ? name.replace('/', '%2f') : name;
return `https://registry.npmjs.org/${encodedName}/${version}`;
}
async function fetchPackageMetadata(name, version) {
const key = `${name}@${version}`;
if (registryCache.has(key)) return registryCache.get(key);
const response = await fetch(registryUrl(name, version), {
headers: { accept: 'application/json' },
});
if (!response.ok) {
throw new Error(`Failed to fetch ${key}: HTTP ${response.status}`);
}
const metadata = await response.json();
const dist = metadata.dist;
if (!dist?.tarball || !dist?.integrity) {
throw new Error(`Missing dist.tarball/dist.integrity for ${key}`);
}
registryCache.set(key, dist);
return dist;
}
// Lockfile Enrichment
async function enrichLockfile(lock) {
let updated = 0;
for (const [lockPath, entry] of Object.entries(lock.packages ?? {})) {
if (!lockPath || entry.link || !entry.version || entry.resolved) continue;
if (!lockPath.includes('node_modules')) continue;
const name = packageNameFromLockPath(lockPath);
if (!name) continue;
const dist = await fetchPackageMetadata(name, entry.version);
entry.resolved = dist.tarball;
entry.integrity = dist.integrity;
updated += 1;
}
return updated;
}
// Main
const version = await getVersion();
const lockUrl = `https://raw.githubusercontent.com/earendil-works/pi-mono/v${version}/package-lock.json`;
const response = await fetch(lockUrl, { headers: { accept: 'application/json' } });
if (!response.ok) throw new Error(`Failed to fetch ${lockUrl}: HTTP ${response.status}`);
const lock = await response.json();
const updated = await enrichLockfile(lock);
await fs.writeFile(lockfilePath, JSON.stringify(lock, null, 2) + '\n');
const displayPath = path.relative(repoRoot.pathname, lockfilePath.pathname);
console.error(`Wrote ${displayPath} for v${version}; restored metadata for ${updated} entries.`);

View File

@@ -11,16 +11,18 @@
}:
buildNpmPackage (finalAttrs: {
pname = "qwen-code";
version = "0.4.0-nightly.20251209.a6a57233";
version = "0.16.0-preview.0";
src = fetchFromGitHub {
owner = "QwenLM";
repo = "qwen-code";
tag = "v${finalAttrs.version}";
hash = "sha256-s9m1IN6jDDbNPr/vI/UcrauYPiyQTDODarLP3EvnG3Y=";
hash = "sha256-UAJNw1RjHRoZqtgIWJ1dOTWnE9LoBpfJCAM0Jay+VPI=";
};
npmDepsHash = "sha256-ngAjCCoHLPZ+GgBRmAKbRYaF7l+RK3YGf1kEkwFbyQg=";
npmDepsHash = "sha256-uJtOeNnhbGE7EzTwkNbg2EHLonjHCbdPH5rcV2bgQUw=";
makeCacheWritable = true;
npmFlags = [ "--legacy-peer-deps" ];
nativeBuildInputs = [
jq
@@ -34,8 +36,13 @@ buildNpmPackage (finalAttrs: {
libsecret
];
postPatch = ''
prePatch = ''
${jq}/bin/jq '.dependencies."iconv-lite" = "^0.7.0"' \
packages/core/package.json > packages/core/package.json.tmp
mv packages/core/package.json.tmp packages/core/package.json
${jq}/bin/jq '
.packages."packages/core".dependencies."iconv-lite" = "^0.7.0" |
del(.packages."node_modules/node-pty") |
del(.packages."node_modules/@lydell/node-pty") |
del(.packages."node_modules/@lydell/node-pty-darwin-arm64") |
@@ -62,9 +69,26 @@ buildNpmPackage (finalAttrs: {
' package-lock.json > package-lock.json.tmp && mv package-lock.json.tmp package-lock.json
'';
preBuild = ''
mkdir -p node_modules/@lydell/node-pty
printf '%s\n' \
'export interface IPty {' \
' pid: number;' \
' onData(callback: (data: string) => void): void;' \
' onExit(callback: (event: { exitCode: number; signal?: number }) => void): void;' \
' kill(signal?: string): void;' \
' write(data: string): void;' \
' resize(columns: number, rows: number): void;' \
' removeListener(event: string, listener: (...args: unknown[]) => void): void;' \
' exitCode?: number;' \
'}' \
> node_modules/@lydell/node-pty/node-pty.d.ts
'';
buildPhase = ''
runHook preBuild
npm run generate
npm run build
npm run bundle
runHook postBuild
'';
@@ -75,6 +99,14 @@ buildNpmPackage (finalAttrs: {
cp -r dist/* $out/share/qwen-code/
npm prune --production
cp -r node_modules $out/share/qwen-code/
if [ -d $out/share/qwen-code/vendor/ripgrep ]; then
find $out/share/qwen-code/vendor/ripgrep -type f -name rg -exec sh -c '
for rg; do
rm "$rg"
ln -s ${ripgrep}/bin/rg "$rg"
done
' sh {} +
fi
find $out/share/qwen-code/node_modules -type l -delete || true
patchShebangs $out/share/qwen-code
ln -s $out/share/qwen-code/cli.js $out/bin/qwen

View File

@@ -0,0 +1,114 @@
{ lib
, stdenv
, maven
, fetchFromGitHub
, jdk17
, jre
, swt
, makeWrapper
, wrapGAppsHook3
, pkg-config
, alsa-lib
, jack2
, fluidsynth
, libpulseaudio
, lilv
, suil
, qt5
, which
}:
maven.buildMavenPackage rec {
pname = "tuxguitar";
version = "2.0.1";
src = fetchFromGitHub {
owner = "helge17";
repo = "tuxguitar";
rev = version;
hash = "sha256-USdYj8ebosXkiZpDqyN5J+g1kjyWm225iQlx/szXmLA=";
};
mvnHash = "sha256-XTODH8SG7iwhACJT4AbIokORUe00r6theV18TEXbrIs=";
doCheck = false;
mvnJdk = jdk17;
nativeBuildInputs = [
makeWrapper
pkg-config
wrapGAppsHook3
];
buildInputs = [
alsa-lib
fluidsynth
jack2
lilv
qt5.qtbase
suil
];
mvnFetchExtraArgs = {
inherit buildInputs;
dontWrapQtApps = true;
};
postPatch = ''
substituteInPlace desktop/build-scripts/native-modules/tuxguitar-synth-lv2-linux/pom.xml \
--replace-fail /usr/include/lilv-0/lilv ${lib.getDev lilv}/include/lilv-0/lilv \
--replace-fail /usr/include/suil-0/suil ${lib.getDev suil}/include/suil-0/suil
if [[ "$name" == maven-deps-* ]]; then
mvn install:install-file \
-Dfile=${swt}/jars/swt.jar \
-DgroupId=org.eclipse.swt \
-DartifactId=org.eclipse.swt.gtk.linux \
-Dpackaging=jar \
-Dversion=4.36 \
-Dmaven.repo.local=$out/.m2
fi
'';
mvnParameters = "-f desktop/build-scripts/tuxguitar-linux-swt/pom.xml verify -P native-modules";
dontWrapGApps = true;
dontWrapQtApps = true;
installPhase = ''
runHook preInstall
mkdir -p $out/bin
cp -r desktop/build-scripts/tuxguitar-linux-swt/target/tuxguitar-*-linux-swt/{dist,lib,share,tuxguitar.sh} $out/
ln -sf ${swt}/jars/swt.jar $out/lib/swt.jar
ln -s ../tuxguitar.sh $out/bin/tuxguitar
runHook postInstall
'';
postFixup = ''
wrapProgram $out/tuxguitar.sh \
"''${gappsWrapperArgs[@]}" \
--prefix PATH : ${lib.makeBinPath [ jre which ]} \
--prefix LD_LIBRARY_PATH : "$out/lib:${lib.makeLibraryPath [
swt
alsa-lib
fluidsynth
jack2
libpulseaudio
lilv
qt5.qtbase
suil
]}"
'';
meta = {
description = "Multitrack guitar tablature editor";
homepage = "https://github.com/helge17/tuxguitar";
license = lib.licenses.lgpl2;
maintainers = with lib.maintainers; [ evanreichard ];
mainProgram = "tuxguitar";
platforms = lib.platforms.linux;
};
}

View File

@@ -1,3 +1,4 @@
open_proxy_token: ENC[AES256_GCM,data:LxC0dR2EQ8XPmw4fwKnKJD3usqImMKH+81I9RSTNsjg=,iv:LQmf+kxtwgAMxcHvIe6y3Qw/oxXvdWGbyV/kdwPpKw4=,tag:TUQbM8sIK6KB7eOsYfHuqw==,type:str]
conduit_apikey: ENC[AES256_GCM,data:4mjvEI00V7nAhPkDa97eOfLCqItxoRALFe8OdxzUiUc=,iv:2mtSu1LDUvaneTnqs3Z4GVAj+HuAY2+VXrpIITSg/64=,tag:trkgxyX0ssCjyKMB42bFGg==,type:str]
context7_apikey: ENC[AES256_GCM,data:K8/OoJMWBhN3ufmTa/tAiD3iMergDZQ1OBucUtLsrg+L26DXDPAko9D41w==,iv:/IVpaaPivUTn2rbIAPIwyN5nb7TmtDh05YlMdOlBkhE=,tag:0XJfoNlDelBwMXMAAqKjtQ==,type:str]
zai_apikey: ENC[AES256_GCM,data:eNgIfEqs8JGM7Qo6D5KMMqRF8fd1qLakYQ9F5oEDUvLqPJ+TAktz8GMVuSndwW5BxA==,iv:eR8IR/MDmhk2JUoT2chCwRYOJGfxEBFGARf1CI7EG8Q=,tag:3fmRWA5eof304WSWKntDFg==,type:str]
@@ -5,8 +6,7 @@ kagi_token: ENC[AES256_GCM,data:6pxxMMQ3RCy6sdUFiuAy8rUzsIMMiBgPzphpgTVMfiHC98ej
rke2_kubeconfig: ENC[AES256_GCM,data:DmmaV5bSnSSbLfenT7/xsv9qq5V1s2b9mzdeOe5JbhXLcvC9RRX3z1TkAwdC9IEAtr0cIiPigJS2fUCo0/baYSZ+lKTZ6pUmuPwX0x1g2O8Vdfe7jTTnTDnZ/A8+CIrS79uhsNxlmQNpEOCCSAOQ4+XAnFbPbLh/0QhV2M3a19ocJBQnFyNpYCxverRvNIfgHOoMskvwn3MEsmp6foOGnwPsbeQ1RRiIyCmf7c6jJQH7O5qDLcTIFNYNKiorr8veRhI5av0eX+5/rM8wWgBVNo/lf4TJnX+ufIUZQYCIz4vpfaw8N1jcpiAJiUFGdlKX+AR9b3ti8owa5+JmQkLNp4GBEI+I0tdMp15K6RjKqkKrkPujtUFntxXC07r+eQ37oUUvS9qilIMrkX0TxWoooShgOgQfVUEAEdtb2o830TL1FFZHTiy5RBkeRQxol4yAW/M0B0S4iIj/W07UHNdp5tBaPotsdyj9QQrumYS67GwWolVW007pG8nvD/lvP55nAndsLZpHAYSFI6z1N5ayx0N4I8OP+dT5ElaQv/tt/KO69EQYEwJetgRLnMQ34WKfAr3akLYja6QxkrhEnhfa60mXP9QLynEWGsfYdMUjPioIiImvdRi+5FkyvQ7aZyVzCRsMNGL3I5f1dXWz2wS+B1oB9yimOpfz4wr2794w64EKO1gF5dso17ebVEBuT8myeOenZREVUJCEunYcFPsMDD8bI+VD/VJDwQI/aWmukBWW9dztySiAJA0RWOb69LeApgx2SUwcPnx1yLerb5FWjA8hzY6GKGKyO8cNMRbH/l6QjAL/oMg3cgi7dH/7o1dGSphvGpTAOmcb82ZiT4gMSHhKIrxdLKyZclGu4Rf/mSjadGzLrEA6qj5r07wJOxZHu2bcMafWnoUZBuo7yE/ogVkruW1vI9c4lNBsOIUescE4sE5qjRncJkPEh9pcwWWLFnRQVCxVSpp72VyeJTxvo9gEBHuZGFF3J7R3YsTq1YhcXAR5+PIseIY46rdxqIh2WmVfG4W/iteuQh+JEcspvHNMB7a7j3yTEOHQ7ILaslLYDDnma4qo6SPuTzhx3Tbkx1WN5FelVkw1INV6qSjL83ghfk5nOVumbYurrTXviqqWg5ikCJ/Ewy2nrpNMbeVMs0x9Wcdpe7xi405IdJm6ry5Ipo9ZMKNJxRDP+ebUBgfiB3WzVI68AvvTpePz3KxGDwzh2aWu3Ei7CJoBrCrkEk2DPxoGvBinOvslZYuGhvUqL5XuNoDjLuxNCY1Dt67dvLi3ydiekZU8mNQ1qraMSFBg4KXH9e718X1zjuAGv86TVUfllxiXeoo6L/Sgn2iO1YW5w6igO5qkuIYEIi1rpx0jFOrbgvZeU6qjBHQmFwEw7h03IVw54s62E2dCy1wSq1BcUu1jUR5iJ0mPJ8ajGhb4D7MRO2wanAQxrzKJhSJ1OAdCzrebprLJRoo1v4YySiQkZ5cD57YnABST+i3/u0aWcS3xDi8Z/NKr0TMyvf2rWpvlOYUfIDgZLQiHBiph0UZNk/XNpvH23e6lEHG/ztmIu8CbcAuAbAy7Qwf664UGq9cK82gklvebO6lo5vCUGpx8mLOQYLIOefdoDJEei3DoTeZvtOpLkNXnRlwSlCY4geCNOioU3H6mtF4JdlLSFM7QMt/4CpMGEEzXDVCu7GI2Gem7VmBuLOBhGYxiF1zG+D6ZKUxOx1rmo7f2flgdfEtlkpQrIbeZfVEnqgb5z9Vw8bziW+Kc5CJyo9iV09BK0aeZWstnR6SKIWwuImWrGM0zSQBHd8QdrgicuR416PnFuElT3dkrF3TedLTKWKasWhlGOYeILMzCz/dnwy85ihp5zc10AbIpLISAvHSaMtEgdiwIc2n2Ti94ntnwfIB3AG20X3/yljDZKezn95+SZV8jOhMk/OcrpGH3UTB7ezHyf6gVD0qLXM6xUgi5vehhsO8ihFFTSNHf0881fimokRHQPjMJ9NC3J26JLhqJEs0Zfwvx2+7NrGq0pnRQ0W0FExy14dWWx270/EF8L91YRFohCJidJYgD5oTEab3eG1itM+OAAIA5xG+g5N6Re+34yO3JDMVUfuxGYpxKxfF8eVvAXB46+CH/lXQH6cPO8plWmlrzT8TS3rPj8MtvXrNaCVHuoWQ3oNu0cSGtsgcX/kJ8P8rOV8wNEdj2EpA8Fq8o741OapgFxpc5fMN3gKc6n7uwnKBHMMaQsZ2ymWeEn+qa6f/Me0DlyOkVLQFTYqAKibOoeyCtNILMf/NPMFMLe7Oktl6f9dvsMtu1zwCSYAV83/Ti8sZdnPFLET4OttQB+Bk8IX7BeTndAc3uMrVuOBwJC4sh2uGvRIJPEYbfw6p2amcn8mylN1o+l6sntpMrudEFo+oAz6M4UCEoNpotRIIMBB+uZo8T876TxSbENz31IDrOJ8ka4hCE0dK/gxZQsnoa16VvouTSc92se52n3RdELu+q9Oeeubd4htUJHhqxpbSmEQGVTWH2kJT7c76NZu7B634aUtoYI5eYmcLB3zsqVwZeG9fpzNeHMS2C6qzT4uKjxmbTW8eicDYSZSyJg4rpEjxf9GZn8f4898JtGvKF+esthhOlaxCEFcVoPQLC4pOAzY/TN+XTPk55bEPQ5LP3cByTfIv1UnZvSjXpAz2m+DlbkGfEqiICHr4HkISJS297CKI05tjpQNfO1Ylp89uL1hqwcddRee6+34kb+1XjSYwKMooZvSf/pasjj5xlpbCtxkiEIHp996H4MNXaySjZ7QU49Yy3EAKjulbc7xQXpkSUZb5Rh8yge/KpCK/5gK/fwlYIJ3tR4mIMN/b7HmfulBmktekK0G2fxagoKFot3DOwY1OnwIJhNzo0fCS3qFe9r2Ixs76C78gXo+DChKxYcYS7N5wmQy1PxKOsBpxeiWyaHCIC8Ey5dPSEoYi0zr+1A6wbkPaCrKI/C9Bwu8f04ySCBdbQQa52uCdWM2ctIsPHZNzkWNcXCfz9XLQZYZG/bQJtj+lDXgKKd8AiF0+WKzinGWHkKQbZPVYJI0s0jLXfH3G+kg5QztkfXedHNY6y0F5hh15jRYWsLAn6Ls2NJVP9SmCvl+pj6wHRKqDV7SbHz2bqCwLzzD68Wvv7b44OLFZEAH8F1TW8D205Dw7YFDiLMnWjeWv8ZX5CNUi7tkiCeORdQGiiv6n3CItvqWQ00tKnO5h+4/E97PtsSgfqG+Pgyjjscz4z02stB1XuaGbfPKe604CeYcmhjf9mV8gjfewOYTs+E2j6aoSKJbOtdiWfIHNngn92D/tOLB2UsuErYd3ZGLoH2yBOVL1rLnkY1QG4JxzXCwpnauQTzHRmg/WpJXurETbVyzFkzkF9YnX9iuJFt1mZglZuaO0JtC93xhdjMqjJyau4Oba7ZwwzyvM4TkgvffbPZit5nNAAZO0HO4r3t5nMIJeS1uBin66pjUWEuZ8ptjA5iYuAJwAg2VOPUuDKc9HBYmQjJ+Yx+KhO0aDmJd2pV/R6TOKua6jAnmny2FFqbyC3znPTfJZHOnVPZZ0V1D0NJ8dP62EDBebTnoFScE+93xyB9ETE3ubrBACAPTMuCQlqAY5ix2F1MpCCT7c0ovMyRlDaUJyFXS93UfJf4MmQg1+yCHuGX9E/bqUXsXC6SdJU9PdJ/3QdhN7iIcy8rPxOfYjJSwFrdUZn9sJcrVsbbPuObOM9HPOtp/E2xTOlYkfIeValLFxBb2lEBZghYXT05X82fSoe9ZBZdJfIMmLnuh/bP++bBVU7/y/aCYFcRCOafDdLQrTOpJ62SIDP0zN0YkmRy41qNlSaMp0ZZpxiK45ihqGP4aqWBmGC822dTsY4og1V07rRnfdG2wQcW+VHLldg72vRoc2brhfRZAOvwHQQCy0NxMD5pCgoSXJD4qsJrOFBcsmbs8YZ2omHzkVf+9ybnUD4WNCjVIvM,iv:CMrKYb+2QZVKEJMjW51rbiYW/cN6ATDzgwfBdSi9B10=,tag:qVqO5byXdj7DZdaHNx7S9A==,type:str]
sops:
age:
- recipient: age1sac93wpnjcv62s7583jv6a4yspndh6k0r25g3qx3k7gq748uvafst6nz4w
enc: |
- enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBrY0pseTB5R290aXAzdnB3
aTVMS2tBdnEvSjR5K241dElEa1Z1Tmt2a0ZJCnRSbVBsZ3lNVlNzdnFlY3VvaWpB
@@ -14,8 +14,8 @@ sops:
S3hOa3p1MHVLek5zaUxhWXFiV2Z3R0UKxSrnYSoN6KcuFdg5K6qwcwh9/j9lI0HB
HqujumuIfWkcctNk38AMn4beeesmXsbJQcUPHUVOZQw6Ov4jXaGz/Q==
-----END AGE ENCRYPTED FILE-----
- recipient: age17ayje4uv2mhwehhp9jr3u9l0ds07396kt7ef40sufx89vm7cgfjq6d5d4y
enc: |
recipient: age1sac93wpnjcv62s7583jv6a4yspndh6k0r25g3qx3k7gq748uvafst6nz4w
- enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBMazJ5VmNXQ2FRYzFuejBU
bWNPWnI2a2R3NVBpY0Y2N0lXd1RXbUdmTkc0ClZDTSs4bklpeVEyMDNrVXJXRHRF
@@ -23,8 +23,8 @@ sops:
a3dPbEZtL0tsOWpWSEtPblZSYk9NVTgKM6TfK7VX6v059FXpRjpAlgX+ab4f6vq2
jH8jyO33YxQYI1kSgXJ5AR8evCoV3FzbZ5rzIy2PRmCOwFV8Im2bRg==
-----END AGE ENCRYPTED FILE-----
- recipient: age1mar507c9mxmwalg486chs5kfh0mya38rv5w64ypfwnwlawewrpnswerpg8
enc: |
recipient: age17ayje4uv2mhwehhp9jr3u9l0ds07396kt7ef40sufx89vm7cgfjq6d5d4y
- enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSB4S0NrbGF4WXN5KzY4S0lt
ZW1PMWE5NUF6NEIyWmp6RWY4MzNYbE0wQnpFCkdOaGtIWmtJT08zNmZNNzcvQldp
@@ -32,8 +32,8 @@ sops:
VFJrQWNnNTJ4azVpMElHbmsrZnJPM2MKx/7XxkZfd1tPMck9FmoM6g28dp5JeXQ5
OdiOLlKc2If1f6dLKkjDmmscMui6aLMQ8RJ8dLK7FKlYy+95VsHVrw==
-----END AGE ENCRYPTED FILE-----
- recipient: age1w6avj7gd4f5frk90lsyh4e2k5am6z92hzlr0vpgrm767muyj59qsnuah62
enc: |
recipient: age1mar507c9mxmwalg486chs5kfh0mya38rv5w64ypfwnwlawewrpnswerpg8
- enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBnZFpKSVpWMCt6Z3M5UGUz
YVI4TG85UVh1eXYwVStQWk5xdEhhSjdNTzFJCk1mNnBtSVV0VFB0TnFIeVdocWFP
@@ -41,8 +41,8 @@ sops:
MzNuSHlWWnNMYkJMMHZkcnh0cXZaREkKE+j0yWV/zK8lz8vRa0cywpLL2DiAFsgi
fgCdeysSacrQLxB8iBWbusJ31ktyJMYLrsWFAdPkl0WN6HjaR3k3CQ==
-----END AGE ENCRYPTED FILE-----
- recipient: age1avlhszrryt4gf4ya536jhzm7qwt9xfttm8x4sns6h9w2tahzqp8sspz9y5
enc: |
recipient: age1w6avj7gd4f5frk90lsyh4e2k5am6z92hzlr0vpgrm767muyj59qsnuah62
- enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBDUFYyTzdpQlJ4VVJJbDF5
cEhIQ1NSRVcwRUFBUzdaeXdkOWhPVjFIdGtBCk5qVmx1L09kVGhiNnlaQ0xoeTVz
@@ -50,8 +50,8 @@ sops:
VmFINkwvZEFuWnkyb2JpTCtmRUVBbUkKKzbifH6Ue84MkpaLHrwDvJu8uvjY7yOY
+qYg0rOqFuZAx9YiOjDR7JVeGpfHM+7pO9ZjSNTPH0f1NC3XwsNp1A==
-----END AGE ENCRYPTED FILE-----
- recipient: age1dccte7xtwswgef089nd80dutp96xnezx5lrqnneh9cusegsnda8sj3dj6c
enc: |
recipient: age1avlhszrryt4gf4ya536jhzm7qwt9xfttm8x4sns6h9w2tahzqp8sspz9y5
- enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBBSS9CQlE0azMwbFo4V0Ez
V3pKL2QvY0NNdmoxZXhQRUU1cG9Temk5M3pZCjBZUHJFRmZ1bTZzQ2RGU2F3WXF5
@@ -59,8 +59,8 @@ sops:
Zm1ReDlMWW8wak5Jb0VncFV2bFJROWsKWi9DTgveMgGG8eK4qNeAGGG/gfiJS96G
232Tgf94Pb8eAU2zbF77pLWMaqTBbYPz1tggcMTfrAeDohq+/0sU8g==
-----END AGE ENCRYPTED FILE-----
- recipient: age1ped3hpugq06908ex8kgama33qckqe03rmac5pa6th87vks5d249qhshvqu
enc: |
recipient: age1dccte7xtwswgef089nd80dutp96xnezx5lrqnneh9cusegsnda8sj3dj6c
- enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBqS0p2TVhWd2VMQTNNR0or
amhNdTh5bmY3WWh1QkgveWZEMEtxSCt3Tng4CnVEKzhDMFVzRktndmI5OFhSei94
@@ -68,7 +68,8 @@ sops:
TC91cmtZWm03dzYwS2E3dkorNkdFY2sKj5OZHOtKx1NGPSGKsWjC/8+seUAhvmxb
wQ0iuPAq6yDLhYV69n7Jx4G9fKoidLIQxq+Ia+tLcYt58UDX7aixJQ==
-----END AGE ENCRYPTED FILE-----
lastmodified: "2026-05-04T03:17:35Z"
mac: ENC[AES256_GCM,data:hFnRRENIaWa+Yidk7JqY+XMZ4xyekQLY9MMSgo5j4w7UI+Fb82RpNjmFrOS13ck7kKS+CVLWrFSEDxWJYcjHyLE5btoi06SFiZYfDF0JlOfq6e+loTj+TqhyWyJoQ3ZYaj9weak+qCroN4q88EWrle3Rlw0b/67lnkkzYJYYOlk=,iv:i9DVooO/9Gjr+yWaEkdXEWABAyrp8qNPt48EzqoEfiA=,tag:8hYmoJ9p3fse9eckhYqkvg==,type:str]
recipient: age1ped3hpugq06908ex8kgama33qckqe03rmac5pa6th87vks5d249qhshvqu
lastmodified: "2026-06-16T18:49:15Z"
mac: ENC[AES256_GCM,data:Q51p1A317BYzKvXSpkx2HBduGLGvxdQFi/BCfUtKWV9uAJGlQDp//eGJ7kJEG6DIO6sWUddf75fBLYAQcqm3iogIetTLUuQl3OhHSpAPvGpUDC6Hh87sAZy/ebaN2cy7BhJy5cjPJ9JAkfHqLCFRP+cVIwS/eb87GzwvWdSGZbc=,iv:yTTwhHCB09Qb6a437VENFiWQPp8CHwd0TelFj4ugO3U=,tag:JXN58pSy2I04O44Hg4pQcw==,type:str]
unencrypted_suffix: _unencrypted
version: 3.12.1
version: 3.13.1

View File

@@ -15,7 +15,7 @@ in
(modulesPath + "/profiles/qemu-guest.nix")
];
system.stateVersion = "25.11";
system.stateVersion = "26.05";
time.timeZone = "UTC";
boot.loader.grub = {

View File

@@ -12,7 +12,7 @@ in
(modulesPath + "/profiles/qemu-guest.nix")
];
system.stateVersion = "25.11";
system.stateVersion = "26.05";
time.timeZone = "UTC";
boot.loader.grub = {

View File

@@ -11,12 +11,20 @@ in
./hardware-configuration.nix
];
system.stateVersion = "25.11";
system.stateVersion = "26.05";
time.timeZone = "America/New_York";
programs.firejail.enable = true;
programs.nix-ld.enable = true;
# Asahi Wi-Fi Resume Bug - The Broadcom driver can fail to reconnect after suspend on this MacBook.
powerManagement.resumeCommands = ''
${pkgs.kmod}/bin/modprobe -r brcmfmac_wcc 2>/dev/null || true
${pkgs.kmod}/bin/modprobe -r brcmfmac 2>/dev/null || true
${pkgs.kmod}/bin/modprobe brcmfmac
${pkgs.systemd}/bin/systemctl restart NetworkManager.service
'';
# System Config
reichard = {
nix = enabled;

View File

@@ -11,7 +11,7 @@ in
(modulesPath + "/profiles/qemu-guest.nix")
];
system.stateVersion = "25.11";
system.stateVersion = "26.05";
time.timeZone = "America/New_York";
programs.nix-ld.enable = true;

View File

@@ -3,7 +3,7 @@ let
inherit (lib.${namespace}) enabled;
in
{
system.stateVersion = "25.11";
system.stateVersion = "26.05";
time.timeZone = "America/New_York";
# Config Boot

View File

@@ -15,7 +15,7 @@ in
(modulesPath + "/profiles/qemu-guest.nix")
];
system.stateVersion = "25.11";
system.stateVersion = "26.05";
time.timeZone = "UTC";
boot.loader.grub = {

View File

@@ -15,7 +15,7 @@ in
(modulesPath + "/profiles/qemu-guest.nix")
];
system.stateVersion = "25.11";
system.stateVersion = "26.05";
time.timeZone = "UTC";
networking.firewall.allowedTCPPorts = [ 443 ];

View File

@@ -12,7 +12,7 @@ in
(modulesPath + "/profiles/qemu-guest.nix")
];
system.stateVersion = "25.11";
system.stateVersion = "26.05";
time.timeZone = "UTC";
networking.firewall.allowedTCPPorts = [ 443 ];

View File

@@ -9,12 +9,13 @@ let
nvidia-smi = "${config.hardware.nvidia.package.bin}/bin/nvidia-smi";
llama-cpp = pkgs.reichard.llama-cpp;
ik-llama-cpp = pkgs.reichard.ik-llama-cpp;
stable-diffusion-cpp = pkgs.reichard.stable-diffusion-cpp.override {
cudaSupport = true;
};
in
{
system.stateVersion = "25.11";
system.stateVersion = "26.05";
time.timeZone = "America/New_York";
nixpkgs.config.allowUnfree = true;
hardware.nvidia-container-toolkit.enable = true;
@@ -65,7 +66,7 @@ in
wantedBy = [ "multi-user.target" ];
serviceConfig.Type = "oneshot";
serviceConfig.RemainAfterExit = true;
script = "${nvidia-smi} -i 0 -pl 250";
script = "${nvidia-smi} -i 0 -pl 290";
};
};
@@ -99,6 +100,8 @@ in
opengl = {
enable = true;
enableNvidia = true;
# GTX 1080 Ti is Pascal; NVIDIA 590+ (nixpkgs stable = 595) dropped Pascal support.
nvidiaPackage = config.boot.kernelPackages.nvidiaPackages.legacy_580;
};
};
@@ -129,6 +132,7 @@ in
# Local Packages
llama-cpp
ik-llama-cpp
stable-diffusion-cpp
];
}

View File

@@ -3,7 +3,7 @@
}:
{
time.timeZone = "America/New_York";
system.stateVersion = "25.11";
system.stateVersion = "26.05";
reichard = {
system = {

View File

@@ -9,7 +9,7 @@ let
cfg = config.${namespace}.user;
in
{
system.stateVersion = "25.11";
system.stateVersion = "26.05";
time.timeZone = "America/New_York";
nixpkgs.config.allowUnfree = true;

View File

@@ -3,9 +3,11 @@ let
inherit (lib.${namespace}) enabled;
in
{
system.stateVersion = "25.11";
system.stateVersion = "26.05";
time.timeZone = "America/New_York";
programs.nix-ld.enable = true;
reichard = {
nix = enabled;

View File

@@ -7,15 +7,10 @@ let
inherit (lib.${namespace}) enabled;
in
{
system.stateVersion = "25.11";
system.stateVersion = "26.05";
time.timeZone = "America/New_York";
boot = {
kernelParams = [
# Mask GPE03 (EC wakeup events) to allow hibernation without spurious CPU wakeups
"acpi_mask_gpe=0x03"
];
};
programs.nix-ld.enable = true;
hardware = {
enableRedistributableFirmware = true;
@@ -83,14 +78,4 @@ in
sops = enabled;
};
};
# Additional System Packages
environment.systemPackages = with pkgs; [
dool
jq
mosh
rclone
sqlite-interactive
unzip
];
}

View File

@@ -13,7 +13,7 @@ in
config = {
# Basic System
system.stateVersion = "25.11";
system.stateVersion = "26.05";
time.timeZone = "UTC";
reichard = {