Compare commits

..

45 Commits

Author SHA1 Message Date
379fe5c290 feat(open-proxy): configure shared token file 2026-06-16 15:03:36 -04:00
ea36629461 fix: webbrowser.open 2026-06-16 14:42:21 -04:00
0f85723755 feat: open-proxy 2026-06-16 14:29:17 -04:00
4db8c17f70 chore(nix): update builder public host key 2026-06-11 16:07:53 -04:00
36f2abc1a9 chore(thinkpad): reorganize packages and drop ACPI GPE mask
Move mosh/unzip into common home packages, add orca-slicer to the thinkpad home, and remove the now-redundant system package list and kernel param.
2026-06-11 16:07:53 -04:00
4cd5d1ba62 chore: bump pi-coding-agent to 0.78.1, reduce llama-swap ctx size, add builder host key
- packages/pi-coding-agent: 0.78.0 → 0.78.1 (new gondolin extension deps)
- llama-swap: reduce qwen3.6-35b-dual macros.ctx 215000 → 131072
- nix: add publicHostKey for remote builder
2026-06-04 22:44:01 -04:00
73b2bbc973 fix: nvidia driver & llama cpp update 2026-06-04 06:27:38 -04:00
bc95e479d6 fix(asahi): reload wifi driver after resume 2026-06-03 21:57:58 -04:00
d15e610337 fix(firefox): restore declarative extensions 2026-06-03 21:57:39 -04:00
f15765d5a7 fix: nvim diag toggle 2026-06-03 08:17:58 -04:00
881f8fe816 fix: nvim update config 2026-06-02 21:21:05 -04:00
9d8ec6d7d2 chore: rename swww to awww 2026-06-02 21:11:20 -04:00
fd9695a754 chore: hyprland migrate config 2026-06-02 21:09:40 -04:00
5cf4b93072 chore: hyprland updates 2026-06-02 20:55:06 -04:00
c359a26d3f chore: update 25.11 -> 26.05 2026-06-02 20:02:57 -04:00
fbeb040914 pi-coding-agent: 0.77.0 → 0.78.0 2026-05-31 19:46:32 -04:00
e18aee716a docs(pi): constrain git commit bodies 2026-05-30 09:00:06 -04:00
b4e1afd012 fix(pi-coding-agent): include browser tools in wrapper 2026-05-30 08:58:51 -04:00
6dd5e38d9b feat(tuxguitar): add source-built package 2026-05-30 08:57:51 -04:00
f4a213de8e chore: llama cpp tweaks 2026-05-29 22:32:20 -04:00
68cb7ea3d5 feat(nvim): exrc dynamic LSP 2026-05-29 07:43:36 -04:00
7842c9cd96 chore(pi-coding-agent): bump to 0.77.0 2026-05-29 07:36:24 -04:00
285fe99461 feat: create skill variable helper 2026-05-28 17:45:37 -04:00
544ab6f781 chore: update pi-coding-agent 2026-05-28 17:16:01 -04:00
332713f8fc feat(pi): adopt pi-web extension for web browsing
Replaces the local web-glimpse skill with the pi-web package, configured
via a sops template so the kagi token can be embedded alongside the
searxng base URL.
2026-05-25 23:19:00 -04:00
b4fffef1d7 docs: add asking questions and rephrase-over-append guidelines to AGENTS.md 2026-05-24 07:53:16 -04:00
a3e080b2f2 build(pi-coding-agent): bump version 0.75.4 → 0.75.5 2026-05-24 07:53:12 -04:00
1b43af76b1 chore: update pi 2026-05-21 07:35:24 -04:00
a2298fcfae fix: restart llama swap on config change 2026-05-20 16:46:40 -04:00
79884e8a77 fix(llama-swap): reduce qwen3.6 dual context 2026-05-20 00:11:08 -04:00
a7941db240 chore(packages): bump llama-cpp to HEAD 20260519
Update llama-cpp from tagged release b9196 to HEAD build dated
2026-05-19 (rev b28a2f37). Switch to date-based versioning for HEAD
builds and update source/webui hashes.

Also clarify update-package-hashes skill: always use lib.fakeHash
instead of literal sha256 placeholder strings.
2026-05-19 20:44:30 -04:00
964b0dd2a1 feat(llama-swap): add qwen3.6-27b IQ4_KS config for CUDA0 2026-05-19 08:00:13 -04:00
81ffe67cce refactor(llama-swap): replace --parallel with -np and add -kvu flag
Switch llama-server invocations from --parallel to -np with -kvu
(kv-cache unified) across Qwen3.6 model configs. Also reduce
context for qwen3.6-27b-cuda0 from 150k to 140k.
2026-05-19 06:22:08 -04:00
5b2111c7e8 chore(packages): bump pi-coding-agent to v0.75.3
Update bundled package-lock.json and refresh npmDepsHash.
2026-05-18 13:28:16 -04:00
62f3ddc218 chore(packages): bump qwen-code to 0.16.0-preview.0 2026-05-17 21:02:34 -04:00
0f9513ec26 chore(packages): bump llama-swap 208 → 216 2026-05-17 20:41:23 -04:00
1623f8d14a chore(packages): bump pi-coding-agent to v0.75.0
Update src hash, npmDepsHash, and package-lock.json for v0.75.0.
Fix preBuild substitute pattern to match new generate-image-models step.
2026-05-17 16:45:41 -04:00
719f2798e5 chore(packages): bump llama-cpp b9180 → b9196 2026-05-17 13:09:49 -04:00
24dd67a0de refactor(llama-swap): standardize model ids 2026-05-17 11:38:25 -04:00
89e2161ff4 build(reichard.llama-cpp): bump to b9180
Update version from b9159 to b9180. Adapt to upstream renames
(tools/server/webui -> tools/ui) and updated vite output paths.
Drop MTP patch (PR #22673) which has been merged upstream.
2026-05-16 13:58:15 -04:00
3b4f54b2b1 build(conduit): update to HEAD 8dfb14f 2026-05-15 13:28:44 -04:00
4e2d03ae89 build(packages): bump llama-cpp to b9159, add WebUI derivation, fix spec-type
- Bump llama-cpp from b9048 to b9159
- Add WebUI build derivation to work around HF bucket fetch in Nix sandbox
- Switch MTP patch from .patch to .diff (squashed unified diff applies cleanly)
- Refactor default.nix with let bindings for cleaner structure
- Add AGENTS.md documenting version/postFetch pitfalls
- Add qwen3.6-27b-vllm-50k single-GPU config to llama-swap
- Fix --spec-type from "mtp" to "draft-mtp" in llama.cpp configs
- Update update-package-hashes skill with fetchpatch/.diff guidance
2026-05-15 11:14:57 -04:00
eaf307db23 docs(pi/agents): expand principles and comment style guidance
Add explicit priority order (correctness > maintainability > polish),
tighten comment style to default-no-comment with why-only rationale,
and document splitting skill workflow from reference sub-docs.
2026-05-14 18:22:01 -04:00
b16d816a18 build(pi-coding-agent): bump to 0.74.0 with enriched lockfile workflow
Upstream v0.74.0 lockfile omits resolved/integrity metadata needed by
buildNpmPackage's offline NPM cache. Add a package-local enriched lockfile,
a script to regenerate it from the npm registry, and a prePatch step to
copy it into the build sandbox.
2026-05-12 17:13:43 -04:00
18e8a39ee3 feat(pi): add scout subagent for codebase reconnaissance 2026-05-12 16:51:15 -04:00
68 changed files with 8289 additions and 883 deletions

View File

@@ -12,7 +12,7 @@ If the user provides only a **package name** (no version), look up the latest ve
## Hard Rules — Read First
1. **Never run `nix build .#<pkg>`** or `.#packages.<system>.<pkg>`. That compiles the package. Only realise **FOD sub-attributes** (`.src`, `.goModules`, `.npmDeps`, `.cargoDeps`) — those are pure downloads, not builds.
2. **Never** use `nix-prefetch-git`, `nix-prefetch-url`, `nix hash path`, `git clone` + manual hashing, `builtins.fetchGit`, or any other ad-hoc method to compute hashes. They produce hashes in formats that don't match what `fetchgit`/`fetchFromGitHub`/etc. expect, and you will waste time chasing mismatches.
2. **Never** use `nix-prefetch-git`, `nix-prefetch-github`, `nix-prefetch-url`, `nix hash path`, `nix hash file` (on a raw patch/tarball), `git clone` + manual hashing, `builtins.fetchGit`, or any other ad-hoc method to compute hashes. They produce hashes in formats that don't match what `fetchgit`/`fetchFromGitHub`/`fetchpatch` expect (notably: `fetchFromGitHub { leaveDotGit = true; }` is non-deterministic across machines, and `fetchpatch` normalizes patches — strips `index abc..def`, `From <sha>`, signatures — so its hash ≠ `nix hash file` of the raw `.patch`).
3. There are exactly **two** correct ways to get a hash, both listed below. If neither fits, stop and ask the user — don't improvise.
## The Only Two Methods
@@ -29,17 +29,27 @@ Copy the `hash = "sha256-..."` line from the output into the package's `src` blo
### Method B — FOD mismatch trick (for everything else)
For `vendorHash`, `npmDepsHash`, `cargoHash`, `cargoLock.outputHashes.<crate>`, or any `src` using a custom fetcher (`leaveDotGit`, `postFetch`, `fetchSubmodules`, etc. — applies to `llama-cpp` and `llama-swap`), realise the **specific FOD sub-attribute** and read the `got:` line from the error.
For `vendorHash`, `npmDepsHash`, `cargoHash`, `cargoLock.outputHashes.<crate>`, `fetchpatch` hashes, or any `src` using a custom fetcher (`leaveDotGit`, `postFetch`, `fetchSubmodules`, etc. — applies to `llama-cpp` and `llama-swap`), realise the **specific FOD sub-attribute** and read the `got:` line from the error.
```bash
nix build .#<name>.src --no-link 2>&1 | tee /tmp/hash.log # for src
nix build .#<name>.goModules --no-link 2>&1 | tee /tmp/hash.log # for vendorHash
nix build .#<name>.npmDeps --no-link 2>&1 | tee /tmp/hash.log # for npmDepsHash
nix build .#<name>.cargoDeps --no-link 2>&1 | tee /tmp/hash.log # for cargoHash
nix build .#<name> --no-link 2>&1 | tee /tmp/hash.log # for fetchpatch / other input FODs (see note)
grep -E '^[[:space:]]*got:' /tmp/hash.log | tail -1 | awk '{print $2}'
```
Setting the hash to `lib.fakeHash` (preferred when `lib` is in scope), `sha256-AAAA...` (44 A's), or leaving the old one in place all work — the build will fail at the FOD with `got: sha256-...` which is the correct value.
**`fetchpatch` note:** patches don't have a dedicated sub-attribute, so you must target the package itself. This is safe *only* when the patch hash is wrong (e.g. `lib.fakeHash`) — Nix realizes the patch FOD before compilation starts, so a hash mismatch aborts with `0 built (1 failed)` and zero compile work. If you accidentally fix all FODs correctly, `nix build .#<name>` will start compiling. To guard against this: always start patch hashes as `lib.fakeHash`, run the build, copy `got:`, paste, and only then re-verify with `.src` / sub-attribute builds (never re-run `.#<name>` to confirm).
**GitHub PR patches — `.patch` vs `.diff`:** When fetching a patch from a GitHub pull request, prefer the `.diff` endpoint over `.patch`.
- `https://github.com/<owner>/<repo>/pull/<N>.patch` — a `git format-patch` **mbox** containing each commit in the PR separately. `git apply` (which `fetchpatch` and the Nix `patchPhase` use) does **not** replay commit history; it applies hunks against the working tree. PRs that create a file in one commit and delete/rename it in a later commit will fail with errors like `The next patch would delete the file X, which does not exist`.
- `https://github.com/<owner>/<repo>/pull/<N>.diff` — a **squashed** unified diff of the PR's net change. Applies cleanly against any base the PR is mergeable against.
Default to `.diff`. Only fall back to `.patch` if you specifically need authorship metadata (rare for Nix patching). If a previously-working `.patch` URL suddenly fails to apply, switching to `.diff` is the first thing to try.
Always use `lib.fakeHash` (or `pkgs.lib.fakeHash` if only `pkgs` is in scope). This is the only reliable way to set a bogus hash — never write a literal `sha256-...` placeholder string. The build will fail at the FOD with `got: sha256-...` which is the correct value.
**Note:** `.src`, `.goModules`, etc. are sub-attributes of the derivation. They download but do not compile. `nix build .#<name>` (without the `.src` suffix) compiles — never do that.

24
flake.lock generated
View File

@@ -28,16 +28,16 @@
]
},
"locked": {
"lastModified": 1772129556,
"narHash": "sha256-Utk0zd8STPsUJPyjabhzPc5BpPodLTXrwkpXBHYnpeg=",
"lastModified": 1779036909,
"narHash": "sha256-zXcwYQGCT6pzinK+1dBB2ekTVtfxGZAapb3Evdcu4fY=",
"owner": "nix-darwin",
"repo": "nix-darwin",
"rev": "ebec37af18215214173c98cf6356d0aca24a2585",
"rev": "56c666e108467d87d13508936aade6d567f2a501",
"type": "github"
},
"original": {
"owner": "nix-darwin",
"ref": "nix-darwin-25.11",
"ref": "nix-darwin-26.05",
"repo": "nix-darwin",
"type": "github"
}
@@ -278,16 +278,16 @@
]
},
"locked": {
"lastModified": 1778401693,
"narHash": "sha256-OVHdCqXXUF5UdGkH+FF2ZL06OLZjj2kvP2dIUmzVWoo=",
"lastModified": 1780361225,
"narHash": "sha256-wnV9ttf4fPWNonBIQmvlrSlNpQYgx5HgWWd007mwIFA=",
"owner": "nix-community",
"repo": "home-manager",
"rev": "389b83002efc26f1145e89a6a8e6edc5a6435948",
"rev": "e28654b71096e08c019d4861ca26acb646f583d8",
"type": "github"
},
"original": {
"owner": "nix-community",
"ref": "release-25.11",
"ref": "release-26.05",
"repo": "home-manager",
"type": "github"
}
@@ -443,16 +443,16 @@
},
"nixpkgs_4": {
"locked": {
"lastModified": 1778003029,
"narHash": "sha256-q/nkKLDtHIyLjZpKhWk3cSK5IYsFqtMd6UtXF3ddjgA=",
"lastModified": 1780203844,
"narHash": "sha256-K5sT4jTpGs15ADhviMKNBH38REpPf5Q6mM1+N6cArVE=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "0c88e1f2bdb93d5999019e99cb0e61e1fe2af4c5",
"rev": "b51242d7d43689db2f3be91bd05d5b24fbb469c4",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-25.11",
"ref": "nixos-26.05",
"repo": "nixpkgs",
"type": "github"
}

View File

@@ -2,7 +2,7 @@
description = "NixOS Hosts";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.11";
nixpkgs.url = "github:NixOS/nixpkgs/nixos-26.05";
nixpkgs-unstable.url = "github:NixOS/nixpkgs/nixos-unstable";
disko.url = "github:nix-community/disko";
determinate.url = "github:determinatesystems/determinate";
@@ -11,7 +11,7 @@
inputs.nixpkgs.follows = "nixpkgs";
};
home-manager = {
url = "github:nix-community/home-manager/release-25.11";
url = "github:nix-community/home-manager/release-26.05";
inputs.nixpkgs.follows = "nixpkgs";
};
apple-silicon = {
@@ -31,7 +31,7 @@
inputs.nixpkgs.follows = "nixpkgs";
};
darwin = {
url = "github:nix-darwin/nix-darwin/nix-darwin-25.11";
url = "github:nix-darwin/nix-darwin/nix-darwin-26.05";
inputs.nixpkgs.follows = "nixpkgs";
};
};

View File

@@ -7,7 +7,7 @@ let
inherit (lib.${namespace}) enabled;
in
{
home.stateVersion = "25.11";
home.stateVersion = "26.05";
reichard = {
user = {

View File

@@ -8,7 +8,7 @@ let
inherit (lib.${namespace}) enabled;
in
{
home.stateVersion = "25.11";
home.stateVersion = "26.05";
reichard = {
user = {
@@ -41,6 +41,7 @@ in
services = {
sketchybar = enabled;
open-proxy.server = enabled;
};
security = {

View File

@@ -33,6 +33,16 @@ else
echo " [✓] VM SOCKS Proxy Already Running"
fi
# Reverse tunnel so the VM's `open`/`xdg-open` reach open-proxy serve on this host.
if ! pgrep -f "ssh -N -R 7777:127.0.0.1:7777 adios-cs" > /dev/null; then
echo " [*] VM Open Proxy Starting..."
ssh -N -R 7777:127.0.0.1:7777 adios-cs &> /dev/null &
disown
echo " [✓] VM Open Proxy Started"
else
echo " [✓] VM Open Proxy Already Running"
fi
echo -e " [*] Connecting..."
# Connect to VM

View File

@@ -9,7 +9,7 @@ let
inherit (lib.${namespace}) enabled;
in
{
home.stateVersion = "25.11";
home.stateVersion = "26.05";
reichard = {
user = {
@@ -20,7 +20,7 @@ in
services = {
ssh-agent = enabled;
fusuma = enabled;
swww = enabled;
awww = enabled;
};
security = {
@@ -58,6 +58,7 @@ in
home.packages = with pkgs; [
orca-slicer
reichard.tuxguitar
];
dconf = {

View File

@@ -7,7 +7,7 @@ let
inherit (lib.${namespace}) enabled;
in
{
home.stateVersion = "25.11";
home.stateVersion = "26.05";
reichard = {
user = {
@@ -17,6 +17,7 @@ in
services = {
ssh-agent = enabled;
open-proxy.client = enabled;
};
security = {
@@ -30,6 +31,7 @@ in
enable = true;
customFastFetchLogo = ./prophet.txt;
};
conduit = enabled;
btop = enabled;
claude-code = enabled;
direnv = enabled;

View File

@@ -7,7 +7,7 @@ let
inherit (lib.${namespace}) enabled;
in
{
home.stateVersion = "25.11";
home.stateVersion = "26.05";
reichard = {
user = {

View File

@@ -7,7 +7,7 @@ let
inherit (lib.${namespace}) enabled;
in
{
home.stateVersion = "25.11";
home.stateVersion = "26.05";
reichard = {
user = {

View File

@@ -8,7 +8,7 @@ let
inherit (lib.${namespace}) enabled;
in
{
home.stateVersion = "25.11";
home.stateVersion = "26.05";
reichard = {
user = {

View File

@@ -9,7 +9,7 @@ let
inherit (lib.${namespace}) enabled;
in
{
home.stateVersion = "25.11";
home.stateVersion = "26.05";
reichard = {
user = {
@@ -20,7 +20,7 @@ in
services = {
ssh-agent = enabled;
fusuma = enabled;
swww = enabled;
awww = enabled;
poweralertd = enabled;
};
@@ -68,6 +68,10 @@ in
};
};
home.packages = with pkgs; [
orca-slicer
];
home.pointerCursor = {
gtk.enable = true;
name = "catppuccin-macchiato-mauve-cursors";

View File

@@ -20,7 +20,7 @@ in
services = {
ssh-agent = enabled;
fusuma = enabled;
swww = enabled;
awww = enabled;
};
security = {

View File

@@ -2,11 +2,13 @@
{
home.packages = with pkgs; [
sqlite-interactive
jnv
jq
mosh
ncdu
ripgrep
reichard.codexis
ripgrep
sqlite-interactive
unzip
];
}

View File

@@ -1,4 +1,9 @@
{ config, lib, pkgs, namespace, ... }:
{ config
, lib
, pkgs
, namespace
, ...
}:
let
inherit (lib)
types
@@ -62,40 +67,47 @@ in
extensions.packages = mkOpt (with lib.types; listOf package)
(with pkgs.firefox-addons; [
bitwarden
pkgs.firefox-addons."ctrl-number-to-switch-tabs"
darkreader
gruvbox-dark-theme
kagi-search
sponsorblock
ublock-origin
# bypass-paywalls-clean
]) "Extensions to install";
};
config = mkIf cfg.enable {
programs.firefox = {
enable = true;
configPath = ".mozilla/firefox";
inherit (cfg) policies;
profiles = {
${config.${namespace}.user.name} = {
inherit (cfg) extraConfig extensions;
inherit (cfg) extraConfig;
inherit (config.${namespace}.user) name;
extensions = {
packages = cfg.extensions.packages;
force = true;
};
id = 0;
settings = mkMerge [
cfg.settings
{
"browser.aboutConfig.showWarning" = false;
"extensions.autoDisableScopes" = 0;
"extensions.activeThemeID" = "{eb8c4a94-e603-49ef-8e81-73d3c4cc04ff}";
"browser.aboutwelcome.enabled" = false;
"browser.sessionstore.warnOnQuit" = true;
"browser.newtabpage.activity-stream.showSponsoredTopSites" = false;
"browser.shell.checkDefaultBrowser" = false;
"general.smoothScroll.msdPhysics.enabled" = true;
"intl.accept_languages" = "en-US,en";
"ui.key.accelKey" = "224";
"ui.key.accelKey" = 91;
# "devtools.chrome.enabled" = true;
# "xpinstall.signatures.required" = false;

View File

@@ -1,154 +0,0 @@
exec-once = uwsm app -- waybar
exec-once = uwsm app -- $terminal
exec-once = uwsm app -- firefox
general {
gaps_in = 5
gaps_out = 12
border_size = 2
# https://wiki.hyprland.org/Configuring/Variables/#variable-types for info about colors
col.active_border = rgba(33ccffee) rgba(00ff99ee) 45deg
col.inactive_border = rgba(595959aa)
# Set to true enable resizing windows by clicking and dragging on borders and gaps
resize_on_border = false
# Please see https://wiki.hyprland.org/Configuring/Tearing/ before you turn this on
allow_tearing = false
layout = dwindle
}
# https://wiki.hyprland.org/Configuring/Variables/#decoration
decoration {
rounding = 10
active_opacity = 1.0
inactive_opacity = 1.0
shadow {
enabled = true
range = 4
render_power = 3
color = rgba(1a1a1aee)
}
blur {
enabled = true
size = 3
passes = 1
vibrancy = 0.1696
}
}
# https://wiki.hyprland.org/Configuring/Variables/#animations
animations {
enabled = yes, please :)
bezier = easeOutQuint,0.23,1,0.32,1
bezier = easeInOutCubic,0.65,0.05,0.36,1
bezier = linear,0,0,1,1
bezier = almostLinear,0.5,0.5,0.75,1.0
bezier = quick,0.15,0,0.1,1
animation = global, 1, 10, default
animation = border, 1, 5.39, easeOutQuint
animation = windows, 1, 4.79, easeOutQuint
animation = windowsIn, 1, 4.1, easeOutQuint, popin 87%
animation = windowsOut, 1, 1.49, linear, popin 87%
animation = fadeIn, 1, 1.73, almostLinear
animation = fadeOut, 1, 1.46, almostLinear
animation = fade, 1, 3.03, quick
animation = layers, 1, 3.81, easeOutQuint
animation = layersIn, 1, 4, easeOutQuint, fade
animation = layersOut, 1, 1.5, linear, fade
animation = fadeLayersIn, 1, 1.79, almostLinear
animation = fadeLayersOut, 1, 1.39, almostLinear
animation = workspaces, 1, 1.94, almostLinear, fade
animation = workspacesIn, 1, 1.21, almostLinear, fade
animation = workspacesOut, 1, 1.94, almostLinear, fade
}
# Ref https://wiki.hyprland.org/Configuring/Workspace-Rules/
# "Smart gaps" / "No gaps when only"
# uncomment all if you wish to use that.
# workspace = w[tv1], gapsout:0, gapsin:0
# workspace = f[1], gapsout:0, gapsin:0
# windowrulev2 = bordersize 0, floating:0, onworkspace:w[tv1]
# windowrulev2 = rounding 0, floating:0, onworkspace:w[tv1]
# windowrulev2 = bordersize 0, floating:0, onworkspace:f[1]
# windowrulev2 = rounding 0, floating:0, onworkspace:f[1]
# See https://wiki.hyprland.org/Configuring/Dwindle-Layout/ for more
dwindle {
pseudotile = true # Master switch for pseudotiling. Enabling is bound to mainMod + P in the keybinds section below
preserve_split = true # You probably want this
}
# See https://wiki.hyprland.org/Configuring/Master-Layout/ for more
master {
new_status = master
}
# https://wiki.hyprland.org/Configuring/Variables/#misc
misc {
force_default_wallpaper = -1 # Set to 0 or 1 to disable the anime mascot wallpapers
disable_hyprland_logo = false # If true disables the random hyprland logo / anime girl background. :(
}
#############
### INPUT ###
#############
# https://wiki.hyprland.org/Configuring/Variables/#input
input {
kb_layout = us
kb_variant =
kb_model =
kb_options =
kb_rules =
follow_mouse = 1
sensitivity = 0.0 # -1.0 - 1.0, 0 means no modification.
touchpad {
scroll_factor = 0.5
disable_while_typing = true
natural_scroll = true
clickfinger_behavior = true
tap-to-click = false
}
}
# https://wiki.hyprland.org/Configuring/Variables/#gestures
gesture = 4, horizontal, workspace, invert
# Thinkpad Trackpoint
device {
name = tpps/2-elan-trackpoint
sensitivity = -0.3
}
##############################
### WINDOWS AND WORKSPACES ###
##############################
# See https://wiki.hyprland.org/Configuring/Window-Rules/ for more
# See https://wiki.hyprland.org/Configuring/Workspace-Rules/ for workspace rules
# Example windowrule v1
# windowrule = float, ^(kitty)$
# Example windowrule v2
# windowrulev2 = float,class:^(kitty)$,title:^(kitty)$
# Ignore maximize requests from apps. You'll probably like this.
windowrulev2 = suppressevent maximize, class:.*
# Fix some dragging issues with XWayland
windowrulev2 = nofocus,class:^$,title:^$,xwayland:1,floating:1,fullscreen:0,pinned:0

View File

@@ -0,0 +1,217 @@
-- Hyprland config (lua backend, Hyprland 0.55+).
-- `mainMod`, `menuMod`, and the monitor(s) are injected by Nix above this file.
-- See https://wiki.hypr.land/Configuring/Start/
local terminal = "ghostty"
local menu = "wofi --show drun"
-------------------
---- AUTOSTART ----
-------------------
hl.on("hyprland.start", function()
hl.exec_cmd("uwsm app -- waybar")
hl.exec_cmd("uwsm app -- " .. terminal)
hl.exec_cmd("uwsm app -- firefox")
end)
-----------------------
---- LOOK AND FEEL ----
-----------------------
hl.config({
general = {
gaps_in = 5,
gaps_out = 12,
border_size = 2,
col = {
active_border = { colors = { "rgba(33ccffee)", "rgba(00ff99ee)" }, angle = 45 },
inactive_border = "rgba(595959aa)",
},
resize_on_border = false,
allow_tearing = false,
layout = "dwindle",
},
decoration = {
rounding = 10,
active_opacity = 1.0,
inactive_opacity = 1.0,
shadow = {
enabled = true,
range = 4,
render_power = 3,
color = 0xee1a1a1a,
},
blur = {
enabled = true,
size = 3,
passes = 1,
vibrancy = 0.1696,
},
},
animations = {
enabled = true,
},
dwindle = {
preserve_split = true,
},
master = {
new_status = "master",
},
misc = {
force_default_wallpaper = -1,
disable_hyprland_logo = false,
},
})
----------------------
---- ANIMATIONS ------
----------------------
hl.curve("easeOutQuint", { type = "bezier", points = { { 0.23, 1 }, { 0.32, 1 } } })
hl.curve("easeInOutCubic", { type = "bezier", points = { { 0.65, 0.05 }, { 0.36, 1 } } })
hl.curve("linear", { type = "bezier", points = { { 0, 0 }, { 1, 1 } } })
hl.curve("almostLinear", { type = "bezier", points = { { 0.5, 0.5 }, { 0.75, 1 } } })
hl.curve("quick", { type = "bezier", points = { { 0.15, 0 }, { 0.1, 1 } } })
hl.animation({ leaf = "global", enabled = true, speed = 10, bezier = "default" })
hl.animation({ leaf = "border", enabled = true, speed = 5.39, bezier = "easeOutQuint" })
hl.animation({ leaf = "windows", enabled = true, speed = 4.79, bezier = "easeOutQuint" })
hl.animation({ leaf = "windowsIn", enabled = true, speed = 4.1, bezier = "easeOutQuint", style = "popin 87%" })
hl.animation({ leaf = "windowsOut", enabled = true, speed = 1.49, bezier = "linear", style = "popin 87%" })
hl.animation({ leaf = "fadeIn", enabled = true, speed = 1.73, bezier = "almostLinear" })
hl.animation({ leaf = "fadeOut", enabled = true, speed = 1.46, bezier = "almostLinear" })
hl.animation({ leaf = "fade", enabled = true, speed = 3.03, bezier = "quick" })
hl.animation({ leaf = "layers", enabled = true, speed = 3.81, bezier = "easeOutQuint" })
hl.animation({ leaf = "layersIn", enabled = true, speed = 4, bezier = "easeOutQuint", style = "fade" })
hl.animation({ leaf = "layersOut", enabled = true, speed = 1.5, bezier = "linear", style = "fade" })
hl.animation({ leaf = "fadeLayersIn", enabled = true, speed = 1.79, bezier = "almostLinear" })
hl.animation({ leaf = "fadeLayersOut", enabled = true, speed = 1.39, bezier = "almostLinear" })
hl.animation({ leaf = "workspaces", enabled = true, speed = 1.94, bezier = "almostLinear", style = "fade" })
hl.animation({ leaf = "workspacesIn", enabled = true, speed = 1.21, bezier = "almostLinear", style = "fade" })
hl.animation({ leaf = "workspacesOut", enabled = true, speed = 1.94, bezier = "almostLinear", style = "fade" })
---------------
---- INPUT ----
---------------
hl.config({
input = {
kb_layout = "us",
kb_variant = "",
kb_model = "",
kb_options = "",
kb_rules = "",
follow_mouse = 1,
sensitivity = 0.0,
touchpad = {
scroll_factor = 0.5,
disable_while_typing = true,
natural_scroll = true,
clickfinger_behavior = true,
tap_to_click = false,
},
},
})
-- 4-finger horizontal swipe to switch workspaces. The old `invert` modifier was
-- removed in the 0.51 gesture rework; flip the physical swipe direction if needed.
hl.gesture({ fingers = 4, direction = "horizontal", action = "workspace" })
-- Thinkpad Trackpoint
hl.device({ name = "tpps/2-elan-trackpoint", sensitivity = -0.3 })
---------------------
---- KEYBINDINGS ----
---------------------
-- Menu Mod Bindings (macOS Transition - Spotlight & Screenshots)
hl.bind(menuMod .. " + SPACE", hl.dsp.exec_cmd(menu))
hl.bind(menuMod .. " + SHIFT + 1", hl.dsp.exec_cmd("hyprshot -m output"))
hl.bind(menuMod .. " + SHIFT + 2", hl.dsp.exec_cmd("hyprshot -m window"))
hl.bind(menuMod .. " + SHIFT + 3", hl.dsp.exec_cmd("hyprshot -m region"))
hl.bind(menuMod .. " + Q", hl.dsp.window.close())
-- Primary Bindings
hl.bind(mainMod .. " + RETURN", hl.dsp.exec_cmd(terminal))
hl.bind(mainMod .. " + M", hl.dsp.exec_cmd("uwsm stop"))
hl.bind(mainMod .. " + V", hl.dsp.window.float({ action = "toggle" }))
hl.bind(mainMod .. " + P", hl.dsp.window.pin())
hl.bind(mainMod .. " + J", hl.dsp.layout("togglesplit"))
hl.bind(mainMod .. " + S", hl.dsp.workspace.toggle_special("magic"))
hl.bind(mainMod .. " + SHIFT + S", hl.dsp.window.move({ workspace = "special:magic" }))
-- Window Focus
hl.bind(mainMod .. " + left", hl.dsp.focus({ direction = "left" }))
hl.bind(mainMod .. " + right", hl.dsp.focus({ direction = "right" }))
hl.bind(mainMod .. " + up", hl.dsp.focus({ direction = "up" }))
hl.bind(mainMod .. " + down", hl.dsp.focus({ direction = "down" }))
-- Workspace switch + move active window to workspace (1-9, 0 -> 10)
for i = 1, 10 do
local key = i % 10
hl.bind(mainMod .. " + " .. key, hl.dsp.focus({ workspace = i }))
hl.bind(mainMod .. " + SHIFT + " .. key, hl.dsp.window.move({ workspace = i }))
end
hl.bind(mainMod .. " + SHIFT + right", hl.dsp.focus({ workspace = "+1" }))
hl.bind(mainMod .. " + SHIFT + left", hl.dsp.focus({ workspace = "-1" }))
-- Window move/resize with mouse
hl.bind(mainMod .. " + mouse:272", hl.dsp.window.drag(), { mouse = true })
hl.bind(mainMod .. " + mouse:273", hl.dsp.window.resize(), { mouse = true })
-- Multimedia & Brightness Keys
hl.bind("XF86AudioRaiseVolume", hl.dsp.exec_cmd("wpctl set-volume @DEFAULT_AUDIO_SINK@ 5%+"), { locked = true, repeating = true })
hl.bind("XF86AudioLowerVolume", hl.dsp.exec_cmd("wpctl set-volume @DEFAULT_AUDIO_SINK@ 5%-"), { locked = true, repeating = true })
hl.bind("XF86AudioMute", hl.dsp.exec_cmd("wpctl set-mute @DEFAULT_AUDIO_SINK@ toggle"), { locked = true, repeating = true })
hl.bind("XF86AudioMicMute", hl.dsp.exec_cmd("wpctl set-mute @DEFAULT_AUDIO_SOURCE@ toggle"), { locked = true, repeating = true })
hl.bind("XF86MonBrightnessUp", hl.dsp.exec_cmd("brightnessctl s 4%+"), { locked = true, repeating = true })
hl.bind("XF86MonBrightnessDown", hl.dsp.exec_cmd("brightnessctl s 5%-"), { locked = true, repeating = true })
-- macOS Keyboard Brightness
hl.bind(menuMod .. " + XF86MonBrightnessUp", hl.dsp.exec_cmd("brightnessctl -d kbd_backlight s 10%+"), { locked = true, repeating = true })
hl.bind(menuMod .. " + XF86MonBrightnessDown", hl.dsp.exec_cmd("brightnessctl -d kbd_backlight s 10%-"), { locked = true, repeating = true })
-- Player Controls
hl.bind("XF86AudioNext", hl.dsp.exec_cmd("playerctl next"), { locked = true })
hl.bind("XF86AudioPause", hl.dsp.exec_cmd("playerctl play-pause"), { locked = true })
hl.bind("XF86AudioPlay", hl.dsp.exec_cmd("playerctl play-pause"), { locked = true })
hl.bind("XF86AudioPrev", hl.dsp.exec_cmd("playerctl previous"), { locked = true })
--------------------------------
---- WINDOWS AND WORKSPACES ----
--------------------------------
hl.window_rule({
name = "suppress-maximize-events",
match = { class = ".*" },
suppress_event = "maximize",
})
hl.window_rule({
name = "fix-xwayland-drags",
match = {
class = "^$",
title = "^$",
xwayland = true,
float = true,
fullscreen = false,
pin = false,
},
no_focus = true,
})

View File

@@ -24,90 +24,27 @@ in
wayland.windowManager.hyprland = {
enable = true;
extraConfig = builtins.readFile ./config/hyprland.conf;
settings = {
"$mainMod" = cfg.mainMod;
"$menuMod" = cfg.menuMod;
"$terminal" = "ghostty";
"$menu" = "wofi --show drun";
# Lua Backend - Hyprland 0.55 deprecated hyprlang and home-manager 26.05 defaults configType to "lua".
configType = "lua";
extraConfig =
let
# Quote unless the value is numeric, so scale can be `2` or `"auto"`.
luaScalar = v: if builtins.match "[0-9]+(\\.[0-9]+)?" v != null then v else ''"${v}"'';
mkMonitor =
s:
let
parts = map lib.trim (lib.splitString "," s);
field = i: if builtins.length parts > i then builtins.elemAt parts i else "";
in
''hl.monitor({ output = "${field 0}", mode = "${field 1}", position = "${field 2}", scale = ${luaScalar (field 3)} })'';
in
''
local mainMod = "${cfg.mainMod}"
local menuMod = "${cfg.menuMod}"
monitor = cfg.monitors;
bind = [
# Menu Mod Bindings (macOS Transition - Spotlight & Screenshots)
"$menuMod, SPACE, exec, $menu"
"$menuMod SHIFT, 1, exec, hyprshot -m output"
"$menuMod SHIFT, 2, exec, hyprshot -m window"
"$menuMod SHIFT, 3, exec, hyprshot -m region"
"$menuMod, Q, killactive"
# Primary Bindings
"$mainMod, RETURN, exec, $terminal"
"$mainMod, M, exit"
"$mainMod, V, togglefloating"
"$mainMod, P, pin"
"$mainMod, J, togglesplit"
"$mainMod, S, togglespecialworkspace, magic"
"$mainMod SHIFT, S, movetoworkspace, special:magic"
# Window Focus
"$mainMod, left, movefocus, l"
"$mainMod, right, movefocus, r"
"$mainMod, up, movefocus, u"
"$mainMod, down, movefocus, d"
# Workspace Switch
"$mainMod, 1, workspace, 1"
"$mainMod, 2, workspace, 2"
"$mainMod, 3, workspace, 3"
"$mainMod, 4, workspace, 4"
"$mainMod, 5, workspace, 5"
"$mainMod, 6, workspace, 6"
"$mainMod, 7, workspace, 7"
"$mainMod, 8, workspace, 8"
"$mainMod, 9, workspace, 9"
"$mainMod, 0, workspace, 10"
# Window Workspace Move
"$mainMod SHIFT, 1, movetoworkspace, 1"
"$mainMod SHIFT, 2, movetoworkspace, 2"
"$mainMod SHIFT, 3, movetoworkspace, 3"
"$mainMod SHIFT, 4, movetoworkspace, 4"
"$mainMod SHIFT, 5, movetoworkspace, 5"
"$mainMod SHIFT, 6, movetoworkspace, 6"
"$mainMod SHIFT, 7, movetoworkspace, 7"
"$mainMod SHIFT, 8, movetoworkspace, 8"
"$mainMod SHIFT, 9, movetoworkspace, 9"
"$mainMod SHIFT, 0, movetoworkspace, 10"
"$mainMod SHIFT, right, workspace, +1"
"$mainMod SHIFT, left, workspace, -1"
];
bindm = [
# Window Resizing
"$mainMod, mouse:272, movewindow"
"$mainMod, mouse:273, resizewindow"
];
bindel = [
# Multimedia & Brightness Keys
",XF86AudioRaiseVolume, exec, wpctl set-volume @DEFAULT_AUDIO_SINK@ 5%+"
",XF86AudioLowerVolume, exec, wpctl set-volume @DEFAULT_AUDIO_SINK@ 5%-"
",XF86AudioMute, exec, wpctl set-mute @DEFAULT_AUDIO_SINK@ toggle"
",XF86AudioMicMute, exec, wpctl set-mute @DEFAULT_AUDIO_SOURCE@ toggle"
",XF86MonBrightnessUp, exec, brightnessctl s 4%+"
",XF86MonBrightnessDown, exec, brightnessctl s 5%-"
# macOS Keyboard Brightness
"$menuMod, XF86MonBrightnessUp, exec, brightnessctl -d kbd_backlight s 10%+"
"$menuMod, XF86MonBrightnessDown, exec, brightnessctl -d kbd_backlight s 10%-"
];
bindl = [
# Player Controls
", XF86AudioNext, exec, playerctl next"
", XF86AudioPause, exec, playerctl play-pause"
", XF86AudioPlay, exec, playerctl play-pause"
", XF86AudioPrev, exec, playerctl previous"
];
};
${lib.concatMapStringsSep "\n" mkMonitor cfg.monitors}
''
+ builtins.readFile ./config/hyprland.lua;
};
programs.waybar = {
@@ -117,9 +54,9 @@ in
{
layer = "top";
position = "top";
mod = "dock";
mode = "dock";
exclusive = true;
passtrough = false;
passthrough = false;
gtk-layer-shell = true;
height = 0;
modules-left = [

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env bash
MODEL="vllm-qwen3.6-27b-long-text"
MODEL="qwen3.6-27b-vllm-180k-cuda0"
SYSTEM_PROMPT="You are a shell command expert. Given a natural language query, generate a single shell command that accomplishes the task."
# Colors

View File

@@ -2,3 +2,5 @@ _scratch
.direnv
.envrc
.agents
.pi
.nvim.lua

View File

@@ -2,11 +2,7 @@
local diagnostics_active = true
local toggle_diagnostics = function()
diagnostics_active = not diagnostics_active
if diagnostics_active then
vim.diagnostic.enable()
else
vim.diagnostic.disable()
end
vim.diagnostic.enable(diagnostics_active)
end
local diagnostics_loclist_active = false

View File

@@ -1,6 +1,6 @@
local llm_endpoint = "https://llm-api.va.reichard.io"
local llm_assistant_model = "vllm-qwen3.6-27b-tools-text "
local llm_infill_model = "qwen3.5-4b-thinking"
local llm_assistant_model = "qwen3.6-27b-vllm-75k-cuda0"
local llm_infill_model = "qwen3.5-4b-cuda1"
local current_fim = "llama"
-- Copilot Configuration

View File

@@ -241,7 +241,12 @@ setup_lsp("gopls", {
})
end,
filetypes = { "go" },
cmd = { "gopls", "-remote=auto" },
cmd = function(dispatchers, config)
return vim.lsp.rpc.start({ "gopls", "-remote=auto" }, dispatchers, {
cwd = config.root_dir,
env = { GOMEMLIMIT = "6GiB" },
})
end,
settings = {
gopls = {
buildFlags = { "-tags=e2e" },
@@ -304,3 +309,30 @@ none_ls.setup({
end
end,
})
------------------------------------------------------
---------------------- EXRC LSP ----------------------
------------------------------------------------------
vim.o.exrc = true -- native path: <cwd>/.nvim.lua on startup
local loaded = {} -- absolute path -> true
local function load_project_config(buf)
local fname = vim.api.nvim_buf_get_name(buf)
if fname == "" then return end
local found = vim.fs.find(".nvim.lua", {
upward = true,
path = vim.fs.dirname(fname),
})[1]
if not found or loaded[found] then return end
local content = vim.secure.read(found)
if content then
loaded[found] = true
local chunk, err = loadfile(found)
if chunk then chunk() else vim.notify("project config: " .. err, vim.log.levels.ERROR) end
end
end
vim.api.nvim_create_autocmd({ "BufReadPost", "BufNewFile" }, {
callback = function(args) load_project_config(args.buf) end,
})

View File

@@ -49,7 +49,7 @@ local function pr_status()
end
require("lualine").setup({
options = { theme = "catppuccin" },
options = { theme = "catppuccin-mocha" },
sections = {
lualine_c = { { pr_status } },
-- lualine_z = { require("opencode").statusline }

View File

@@ -1,4 +1,7 @@
require("nvim-treesitter.configs").setup({
highlight = { enable = true, additional_vim_regex_highlighting = false },
})
vim.treesitter.language.register("markdown", "octo")
vim.api.nvim_create_autocmd("FileType", {
callback = function(args)
pcall(vim.treesitter.start, args.buf)
end,
})

View File

@@ -135,18 +135,18 @@ in
golangci-lint-langserver
lua-language-server
nil
nodePackages.eslint
nodePackages.svelte-language-server
nodePackages.typescript
nodePackages.typescript-language-server
nodePackages.vscode-langservers-extracted
eslint
svelte-language-server
typescript
typescript-language-server
vscode-langservers-extracted
pyright
python312Packages.autopep8
# Formatters
luaformatter
nixpkgs-fmt
nodePackages.prettier
prettier
stylua
sql-formatter
@@ -177,9 +177,9 @@ in
clangd = "${pkgs.clang-tools}/bin/clangd",
golintls = "${pkgs.golangci-lint-langserver}/bin/golangci-lint-langserver",
luals = "${pkgs.lua-language-server}/bin/lua-language-server",
sveltels = "${pkgs.nodePackages.svelte-language-server}/bin/svelteserver",
tsls = "${pkgs.nodePackages.typescript-language-server}/bin/typescript-language-server",
vscls = "${pkgs.nodePackages.vscode-langservers-extracted}",
sveltels = "${pkgs.svelte-language-server}/bin/svelteserver",
tsls = "${pkgs.typescript-language-server}/bin/typescript-language-server",
vscls = "${pkgs.vscode-langservers-extracted}",
sqls = "${pkgs.sqls}/bin/sqls",
}
return nix_vars

View File

@@ -18,6 +18,10 @@ Be cognizant of context use; this file is loaded for all LLMs. Keep guidance con
nix run nixpkgs#python3 -- script.py
```
## Asking Questions
If a task is ambiguous, underspecified, or you foresee a non-obvious tradeoff during implementation, **surface it before coding** rather than guessing and producing rework. Treat this as always-on; an explicit "any questions?" is never required.
## Context Discipline
Prefer a **search → targeted read** pattern:
@@ -29,26 +33,37 @@ Full-file reads are fine when genuinely needed, but avoid them as the default re
## Principles
1. **KISS / YAGNI**: Keep solutions simple. Avoid abstractions, generics, or indirection unless there is a concrete need.
1. **Priority order**: When goals conflict, optimize in this order:
1. **Correctness** — solve the actual use case, including the realistic failure modes (not just the happy path).
2. **Maintainability / readability** — non-negotiable. Code is read far more than it is written; clarity wins over cleverness.
3. **Abstraction & polish** — only after the above are solid, and only when a concrete need justifies it.
2. **Maintain AGENTS.md**: Keep project guidance up to date, but BLUF: concise, actionable, and context-size conscious.
All three matter, but never sacrifice (2) for (3). Prefer obvious, boring code over slick code that requires a paragraph to explain.
3. **Knowledge Capture**: At task end, if you discovered non-obvious conventions, pitfalls, or repeatable workflows that would have saved time, briefly recommend adding them to AGENTS.md or a skill. Say whether each belongs in project-level context, global agent context, or a task-specific skill. Skip this when there is nothing meaningful.
2. **KISS / YAGNI**: Avoid abstractions, generics, or indirection unless there is a concrete, present need. Speculative flexibility is a maintainability tax.
3. **Maintain AGENTS.md**: Keep project guidance up to date, but BLUF: concise, actionable, and context-size conscious.
4. **Rephrase over append**: When extending existing content (docs, comments, prose, code), prefer rephrasing to capture the new intent over tacking on more verbosity.
5. **Positive framing over prohibition**: State what _to_ do, not what _not_ to do. Default to omitting an instruction entirely rather than adding a "don't do X" rule — omission costs less context and avoids the failure mode where deleting a prohibition gets inverted into a mandate. Reserve explicit prohibitions for cases where the wrong behavior is a likely default that positive guidance alone can't redirect.
6. **Knowledge Capture Check**: Before the final response, ask whether the task revealed a non-obvious convention, pitfall, repeatable workflow, or missing helper. If yes, briefly recommend exactly where to capture it: package/project AGENTS.md, global AGENTS.md, a skill, or a helper script. Skip this note when there is nothing meaningful.
## Style
### Comment Style
A logical block of code (not necessarily a language scope) should have a short Title Case comment above it:
Default to **no comment**. Code should be self-explanatory through naming and structure. Only add a comment when it earns its place by explaining something the code cannot.
```go
// Map Component Results
for _, comp := range components {
results[comp.Name] = comp.Result
}
```
Write a comment when, and only when:
If the block is more complicated or non-obvious, explain _why_ after the title:
1. The _why_ is non-obvious (intent, constraint, workaround, surprising invariant).
2. A reader familiar with the language/codebase would otherwise stop and ask "why?".
Do not narrate _what_ the code does. Do not add Title Case section headers over logical blocks just to label them.
When a comment _is_ warranted, use a short Title Case label, a dash, and the _why_:
```go
// Map Component Results - Downstream consumers expect a name-keyed lookup.
@@ -56,3 +71,11 @@ for _, comp := range components {
results[comp.Name] = comp.Result
}
```
Rules for the explanation after the dash:
- Keep it to **23 sentences max**. Never a paragraph.
- State the _why_ directly. Do not restate what the code does, recap prior context, or hedge.
- Do **not** hard-wrap comments at 80 columns. Up to ~120 is fine.
If a block is complex enough that it needs a heading just to be navigable, that is usually a signal to extract a well-named function instead.

View File

@@ -14,6 +14,7 @@ Scaffold a new skill directory with a `SKILL.md` and optional helper scripts und
### 1. Gather Requirements
Ask the user:
- **What does the skill do?** (trigger conditions, purpose)
- **Are there repeatable commands?** (if yes, these become scripts)
@@ -26,23 +27,29 @@ Create `skills/<skill-name>/SKILL.md` with this structure:
```markdown
---
name: <skill-name>
description: '<One-liner: what it does and when to trigger. Keep under ~200 chars.>'
description: "<One-liner: what it does and when to trigger. Keep under ~200 chars.>"
---
# <Skill Title>
## Overview
[1-2 sentences on purpose and scope]
## Workflow
[Numbered steps the agent follows]
```
**Guidelines:**
- **Be concise.** Skills are injected into agent context — every line costs tokens. Aim for the minimum needed to reliably guide the agent.
- **Use scripts for repeatable logic.** If a step involves a multi-line shell command, `jq` pipeline, or API call that won't change between runs, put it in a `.sh` file next to `SKILL.md` and reference it from the workflow. See `address-gh-review/` for an example.
- **Needs configurable values (paths, identifiers, etc.; not secrets — values are stored as plaintext files)?** Copy `assets/variable.sh` into the new skill's `scripts/` dir as-is. Callers use `variable.sh --get NAME [--require-exec RELPATH]`; the helper prints self-explaining `--set` instructions on "unset" or "set-but-invalid" and exits non-zero, so callers just propagate. The helper self-ignores its `.vars/` store on first `--set`, so no `.gitignore` setup is needed.
- **Frontmatter is required.** `name` and `description` fields. The description is what the agent uses to decide whether to load the skill, so make it specific about trigger conditions.
- **Don't over-specify.** Trust the agent to fill gaps. Document the _what_ and _when_, not every micro-step.
- **Frame positively; omit rather than prohibit.** Write what the agent _should_ do. Prefer leaving a rule out over adding "don't do X" (see AGENTS.md principle: _Positive framing over prohibition_).
- **Split workflow from reference when the reference surface grows.** If a skill accumulates lookup tables, mapping rules, or capability references that the workflow consults, move them into a sibling `<skill>/<category>/` directory (e.g. `mappings/`, `references/`) with one sub-doc per category and an index `README.md`. Keep `SKILL.md` focused on the hot path — workflow, hard rules, and a short table pointing at the sub-docs. Include a brief style guide in the index README covering (a) defer to authoritative sources (stubs, schemas, generated docs) whenever possible, (b) row/entry formatting conventions, (c) when to create a new sub-doc vs. extend an existing one.
### 3. Present for Review

View File

@@ -0,0 +1,60 @@
#!/usr/bin/env bash
# Skill-local variable store. Values live in <skill-dir>/.vars/<NAME>.
#
# Usage:
# variable.sh --get NAME # prints value to stdout, exits 0
# # or prints a self-explaining hint to
# # stderr and exits 2 if unset.
# variable.sh --set NAME VALUE # writes value, exits 0.
#
# Callers should treat a non-zero exit as fatal; the stderr message tells
# the caller (agent or user) exactly how to populate the missing value.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SKILL_DIR="$(dirname "$SCRIPT_DIR")"
STORE="$SKILL_DIR/.vars"
SELF="$0"
usage() {
cat >&2 <<EOF
Usage:
$SELF --get NAME
$SELF --set NAME VALUE
EOF
exit 2
}
case "${1:-}" in
--get)
[[ $# -eq 2 ]] || usage
name="$2"
file="$STORE/$name"
if [[ ! -f "$file" ]]; then
cat >&2 <<EOF
$SELF: $name is not set.
Ask the user for the value, then set it:
$SELF --set $name <value>
EOF
exit 2
fi
cat "$file"
;;
--set)
[[ $# -eq 3 ]] || usage
name="$2"; value="$3"
[[ "$name" =~ ^[A-Z][A-Z0-9_]*$ ]] || {
echo >&2 "$SELF: invalid name '$name' (must match [A-Z][A-Z0-9_]*)"
exit 2
}
mkdir -p "$STORE"
# Self-ignore the store so values never get committed, even if the
# skill root lacks a .gitignore entry for .vars/.
[[ -f "$STORE/.gitignore" ]] || printf '*\n' > "$STORE/.gitignore"
printf '%s' "$value" > "$STORE/$name"
;;
*)
usage
;;
esac

View File

@@ -126,6 +126,8 @@ EOF
- Imperative mood: "fix bug" not "fixes bug"
- Reference issues: `Closes #123`, `Refs #456`
- Keep description under 72 characters
- Keep commit bodies short: 4 sentences max.
- If you are unsure whether a body is useful, omit it entirely.
## Git Safety Protocol

View File

@@ -1,112 +0,0 @@
---
name: web-glimpse
description: 'Search the web, read pages, extract content, run JavaScript, or capture screenshots using the `glimpse` headless browser tool. Use when the user asks to search the web, look something up online, read/fetch a page, inspect dynamic content, or capture visual state. Does not replace curl for simple HTTP/API requests.'
---
# Web Browsing With Glimpse
`glimpse` runs headless Firefox via WebDriver. Use it for web search, reading rendered pages, running JavaScript, and screenshots. Prefer `curl` for simple APIs, static files, and direct downloads.
## Commands
| Command | Purpose |
| ------- | ------- |
| `reader <url>` | Extract page content as Markdown (Reader View → raw fallback) |
| `exec <url>` | Run JavaScript on a page, return the result |
| `screenshot <url>` | Save a PNG screenshot |
| `search <query>` | Search the web (Kagi) and return results |
| `serve` | Start a persistent browser for faster repeat commands |
## Persistent Server
For multi-command sessions, start a persistent browser server first. All subsequent commands auto-discover it via Unix socket — no extra flags needed.
```bash
# Start persistent server (keeps geckodriver + Firefox alive)
glimpse serve &
# All commands now reuse the running browser (~300ms vs ~2-3s each)
glimpse reader https://example.com
glimpse reader https://other.com
glimpse exec https://example.com --js='return document.title'
# Check server status
glimpse serve --status
# Stop when done
glimpse serve --stop
```
State (cookies, localStorage) persists between commands — this is intentional for sticky sessions. Without a running server, commands work normally with ad-hoc browser startup.
## Quick Reference
```bash
# Read a page (tries Reader View, falls back to raw Turndown)
glimpse reader https://example.com --timeout=15
# Read without Reader View (raw HTML → Markdown via Turndown)
glimpse reader https://example.com --no-reader --timeout=15
# Get structured JSON instead of Markdown (includes method: "reader"|"raw")
glimpse reader https://example.com --format=json
# Save extracted content to a file
glimpse reader https://example.com --output=page.md
# Run JavaScript and return a value
glimpse exec https://example.com --js='return document.title'
# Extract specific data with JavaScript
glimpse exec https://example.com --wait-until=complete --js='return {
title: document.title,
text: document.body.innerText.slice(0, 4000)
}'
# Wait for dynamic content before extracting
glimpse reader https://example.com \
--wait-js='return document.querySelector(".content")?.innerText?.length > 100' \
--timeout=30
# Capture a screenshot
glimpse screenshot https://example.com --output=page.png
# Search the web
glimpse search "query terms" --timeout=15
# Search and get JSON instead of Markdown
glimpse search "query terms" --format=json
```
## Common Options
| Option | Default | Purpose |
| ------ | ------- | ------- |
| `--timeout=<s>` | `10` | Max wait time in seconds; increase for slow/JS-heavy pages |
| `--wait-until=<state>` | `none` | Wait for `none`, `interactive`, or `complete` |
| `--wait-js=<code>` | — | Poll JS expression until truthy |
| `--js=<code>` | — | Run inline JS before command logic |
| `--script=<file>` | — | Run JS file before command logic |
| `--no-headless` | — | Show the browser window |
| `--format=<fmt>` | varies | Output format (reader: `markdown`/`html`/`text`/`json`; search: `markdown`/`json`) |
| `--output=<file>` | — | Write output to file (reader, screenshot) |
| `--no-reader` | — | Skip Reader View, use raw page extraction |
## Workflow
1. **Search first** when the user asks an open-ended question. Pick authoritative results to read.
2. **Read pages with `reader`** — it tries Firefox Reader View for clean article extraction, then falls back to converting the raw page HTML to Markdown via Turndown. Most pages work without extra options.
3. **Add `--wait-until=complete`** for JS-heavy pages, SPAs, or pages that load content dynamically.
4. **Use `exec`** when you need targeted data extraction via JavaScript rather than full page content.
5. **Use `screenshot`** when visual layout, charts, or rendering state matters.
6. **Increase timeouts** — start at `15`, go to `30` for slow sites. The default `10` is often too tight for real-world pages.
7. **Cite URLs** when summarizing web research. Distinguish search snippets from verified page content.
## Error Handling
| Error | Fix |
| ----- | --- |
| `TIMEOUT` | Increase `--timeout` (in seconds), add `--wait-until=complete`, or use `--wait-js` |
| `USAGE_ERROR` | Check arg order: `glimpse <command> <url>`, search is `glimpse search "query"` |
| Thin/empty content | Try `--wait-until=complete`, `--no-reader`, or targeted `exec` |
| Search auth errors | Kagi token is configured via `~/.config/glimpse/config.json` or `KAGI_TOKEN` env |

View File

@@ -0,0 +1,10 @@
---
name: scout
description: Fast codebase reconnaissance. Reads and searches files, then returns concise findings with paths.
approved_tools:
- read
- bash
---
You are a focused codebase scout running as a subagent with isolated context.
Find the information requested by the task and return a concise report. Prefer exact file paths, symbol names, and line numbers. Do not modify files.

View File

@@ -17,6 +17,7 @@ let
# writing other fields (current model, etc.) without us clobbering them.
piPackages = [
"https://gitea.va.reichard.io/evan/pi-lsp.git@main"
"https://gitea.va.reichard.io/evan/pi-web.git@main"
"https://gitea.va.reichard.io/evan/pi-subagents.git@main"
"https://gitea.va.reichard.io/evan/pi-statusline.git@main"
];
@@ -93,7 +94,6 @@ in
# Define Pi Configuration
home.file = {
".pi/agent/AGENTS.md" = {
source = ./config/AGENTS.md;
};
@@ -101,6 +101,10 @@ in
source = ./config/skills;
recursive = true;
};
".pi/agent/subagents" = {
source = ./config/subagents;
recursive = true;
};
".pi/agent/prompts" = {
source = ./config/prompts;
recursive = true;
@@ -127,6 +131,17 @@ in
})
piAuthApiKeys
);
# Pi Web Config - Sops template so the kagi token (declared by the
# glimpse module, which pi enables above) can be embedded alongside
# the non-secret searxng base URL.
templates."pi-web.json" = {
path = "${config.home.homeDirectory}/.pi/pi-web/config.json";
content = builtins.toJSON {
provider = "searxng";
kagi.token = "${config.sops.placeholder.kagi_token}";
searxng.baseUrl = "https://search.va.reichard.io";
};
};
templates."pi-models.json" = {
path = "${config.home.homeDirectory}/.pi/agent/models.json";
content = builtins.toJSON {

View File

@@ -5,23 +5,23 @@
, ...
}:
let
cfg = config.${namespace}.services.swww;
cfg = config.${namespace}.services.awww;
in
{
options.${namespace}.services.swww = {
enable = lib.mkEnableOption "swww wallpaper service";
options.${namespace}.services.awww = {
enable = lib.mkEnableOption "awww wallpaper service";
};
config = lib.mkIf cfg.enable {
home.packages = with pkgs; [
swww
awww
];
systemd.user = {
services = {
swww-daemon = {
awww-daemon = {
Unit = {
Description = "SWWW Wallpaper Daemon";
Description = "AWWW Wallpaper Daemon";
After = [ "graphical-session.target" ];
};
@@ -31,7 +31,7 @@ in
Service = {
Type = "simple";
ExecStart = "${pkgs.swww}/bin/swww-daemon";
ExecStart = "${pkgs.awww}/bin/awww-daemon";
Restart = "on-failure";
RestartSec = 5;
};
@@ -39,28 +39,28 @@ in
change-wallpaper = {
Unit = {
Description = "SWWW Wallpaper Changer";
After = [ "swww-daemon.service" ];
Requires = [ "swww-daemon.service" ];
Description = "AWWW Wallpaper Changer";
After = [ "awww-daemon.service" ];
Requires = [ "awww-daemon.service" ];
};
Install = {
WantedBy = [ "swww-daemon.service" ];
WantedBy = [ "awww-daemon.service" ];
};
Service = {
Type = "oneshot";
ExecStart = "${pkgs.writeShellScript "change-wallpaper-script" ''
WALLPAPER=$(${pkgs.findutils}/bin/find $HOME/Wallpapers -type f | ${pkgs.coreutils}/bin/shuf -n 1)
${pkgs.swww}/bin/swww img "$WALLPAPER" --transition-type random
${pkgs.awww}/bin/awww img "$WALLPAPER" --transition-type random
''}";
};
};
};
timers.swww-schedule = {
timers.awww-schedule = {
Unit = {
Description = "SWWW Wallpaper Schedule";
Description = "AWWW Wallpaper Schedule";
};
Install = {

View File

@@ -0,0 +1,76 @@
{ config
, lib
, pkgs
, namespace
, ...
}:
let
inherit (lib) mkIf mkEnableOption;
cfg = config.${namespace}.services.open-proxy;
package = pkgs.reichard.open-proxy;
secretName = "open_proxy_token";
tokenPath = config.sops.secrets.${secretName}.path;
in
{
options.${namespace}.services.open-proxy = {
server.enable = mkEnableOption "open-proxy host server (opens forwarded URLs/files on this machine)";
client.enable = mkEnableOption "open-proxy client (shadows open/xdg-open to forward to the host)";
};
config = lib.mkMerge [
(mkIf (cfg.server.enable || cfg.client.enable) {
sops.secrets.${secretName} = {
sopsFile = lib.snowfall.fs.get-file "secrets/common/evanreichard.yaml";
};
})
(mkIf cfg.server.enable {
assertions = [
{
assertion = pkgs.stdenv.isDarwin;
message = "reichard.services.open-proxy.server is only supported on macOS (Darwin).";
}
];
launchd.agents.open-proxy = {
enable = true;
config = {
Label = "io.reichard.open-proxy";
ProgramArguments = [ "${package}/bin/open-proxy" "serve" ];
RunAtLoad = true;
KeepAlive = true;
EnvironmentVariables = {
OPEN_PROXY_TOKEN_FILE = tokenPath;
# open(1) lives in /usr/bin; launchd agents don't inherit a login PATH.
PATH = "/usr/bin:/bin:/usr/sbin:/sbin";
};
StandardOutPath = "${config.home.homeDirectory}/Library/Logs/open-proxy/open-proxy.out.log";
StandardErrorPath = "${config.home.homeDirectory}/Library/Logs/open-proxy/open-proxy.err.log";
};
};
})
(mkIf cfg.client.enable {
assertions = [
{
assertion = pkgs.stdenv.isLinux;
message = "reichard.services.open-proxy.client is only supported on Linux.";
}
];
# Shadow the openers via ~/.local/bin (prepended to PATH below). open-proxy
# keys off argv[0], so these symlinks run in client mode and fall back to
# any real opener further down PATH when the host is unreachable.
home.file = {
".local/bin/open".source = "${package}/bin/open-proxy";
".local/bin/xdg-open".source = "${package}/bin/open-proxy";
};
home.sessionPath = [ "$HOME/.local/bin" ];
home.sessionVariables = {
BROWSER = "open";
OPEN_PROXY_TOKEN_FILE = tokenPath;
};
})
];
}

View File

@@ -16,6 +16,12 @@ in
enable32Bit = mkBoolOpt false "enable 32-bit";
enableIntel = mkBoolOpt false "support for intel";
enableNvidia = mkBoolOpt false "support for nvidia";
nvidiaPackage = lib.mkOption {
type = lib.types.package;
default = config.boot.kernelPackages.nvidiaPackages.stable;
defaultText = "config.boot.kernelPackages.nvidiaPackages.stable";
description = "nvidia driver package; pin to legacy_580 for Pascal (GTX 10xx) and older";
};
};
config = mkIf cfg.enable {
@@ -32,7 +38,7 @@ in
# Enable Nvidia Hardware
hardware.nvidia = mkIf cfg.enableNvidia {
package = config.boot.kernelPackages.nvidiaPackages.stable;
package = cfg.nvidiaPackage;
modesetting.enable = true;
powerManagement.enable = true;
open = false;

View File

@@ -38,6 +38,7 @@ in
sshUser = "evanreichard";
protocol = "ssh";
sshKey = config.sops.secrets.builder_ssh_key.path;
publicHostKey = "c3NoLWVkMjU1MTkgQUFBQUMzTnphQzFsWkRJMU5URTVBQUFBSUdscEMwcm9yQVRLeks4bUxNS2dDWXFNNU4yTi9HZ1MydDRNMTNjd25BT1M=";
supportedFeatures = [
"benchmark"
"big-parallel"

View File

@@ -1,8 +1,12 @@
# llama-swap Module — Agent Guide
## Model ID Convention
Use `<family>-<size>[-backend/variant][-context][-vl]-<placement>`. Omit `thinking` from IDs, use `vl` for vision-language models, and keep placement as the final suffix (`cuda0`, `cuda1`, or `dual`). Keep quantization and richer behavior details in the display `name` unless they are needed to distinguish two active configs for the same family/placement.
## Syncing vLLM Configs from club-3090
The three vLLM model configs in `config.nix` (`vllm-qwen3.6-27b-long-text`, `vllm-qwen3.6-27b-long-vision`, `vllm-qwen3.6-27b-tools-text`) are derived from the club-3090 repo's Docker Compose files. Each config block has a `Synced from:` comment with the commit hash it was last aligned to.
The three vLLM model configs in `config.nix` (`qwen3.6-27b-vllm-180k-cuda0`, `qwen3.6-27b-vllm-145k-vl-cuda0`, `qwen3.6-27b-vllm-75k-cuda0`) are derived from the club-3090 repo's Docker Compose files. Each config block has a `Synced from:` comment with the commit hash it was last aligned to.
### Source Files
@@ -10,9 +14,9 @@ The upstream compose files live at https://github.com/noonghunna/club-3090 under
| config.nix model ID | Compose file |
|------------------------------------|-------------------------------------|
| `vllm-qwen3.6-27b-long-text` | `docker-compose.long-text.yml` |
| `vllm-qwen3.6-27b-long-vision` | `docker-compose.long-vision.yml` |
| `vllm-qwen3.6-27b-tools-text` | `docker-compose.tools-text.yml` |
| `qwen3.6-27b-vllm-180k-cuda0` | `docker-compose.long-text.yml` |
| `qwen3.6-27b-vllm-145k-vl-cuda0` | `docker-compose.long-vision.yml` |
| `qwen3.6-27b-vllm-75k-cuda0` | `docker-compose.tools-text.yml` |
### Sync Process

View File

@@ -14,8 +14,8 @@ in
# ---------------------------------------
# https://huggingface.co/mradermacher/gpt-oss-20b-heretic-v2-i1-GGUF/tree/main
"gpt-oss-20b-thinking" = {
name = "GPT OSS (20B) - Thinking";
"gpt-oss-20b-cuda0" = {
name = "GPT OSS 20B (CUDA0)";
macros.ctx = "131072";
cmd = ''
${llama-cpp}/bin/llama-server \
@@ -33,49 +33,23 @@ in
};
# https://huggingface.co/unsloth/Qwen3.6-35B-A3B-GGUF/tree/main
"qwen3.6-35b-thinking" = {
name = "Qwen3.6 (35B) - Thinking";
"qwen3.6-35b-cuda0" = {
name = "Qwen3.6 35B (CUDA0, UD-IQ4)";
macros.ctx = "262144";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3.6/Qwen3.6-35B-A3B-UD-IQ4_XS.gguf \
-m /mnt/ssd/Models/Qwen3.6/Qwen3.6-35B-A3B-UD-IQ4_NL.gguf \
-c ''${ctx} \
-np 2 -kvu \
--temp 0.6 \
--top-p 0.95 \
--top-k 20 \
--min-p 0.0 \
--presence-penalty 1.5 \
-dev CUDA0 \
-fit off \
--chat-template-kwargs "{\"preserve_thinking\": true}"
'';
metadata = {
type = [
"text-generation"
"coding"
];
};
};
# https://huggingface.co/unsloth/Qwen3.6-27B-GGUF-MTP/tree/main
"qwen3.6-27b-udq4-thinking" = {
name = "Qwen3.6 (27B) - Thinking (UD-Q4)";
macros.ctx = "140000";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3.6/Qwen3.6-27B-UD-Q4_K_XL.gguf \
-c ''${ctx} \
--parallel 1 \
--temp 0.6 \
--top-p 0.95 \
--top-k 20 \
--min-p 0.00 \
--presence-penalty 0.0 \
-ctk q8_0 \
-ctv q8_0 \
--spec-type mtp \
--spec-type draft-mtp \
--spec-draft-n-max 3 \
-dev CUDA0 \
-fit off \
@@ -90,26 +64,49 @@ in
};
# https://huggingface.co/ubergarm/Qwen3.6-27B-GGUF/tree/main
"ik-qwen3.6-27b-iq4ks-thinking" = {
name = "Qwen3.6 (27B) - Thinking (ik IQ4_KS)";
macros.ctx = "131072";
"qwen3.6-27b-ik-cuda0" = {
name = "Qwen3.6 (27B) (CUDA0, IQ4_KS)";
macros.ctx = "156000";
env = [ "CUDA_VISIBLE_DEVICES=0" ];
cmd = ''
${ik-llama-cpp}/bin/llama-server \
--port ''${PORT} \
--model /mnt/ssd/Models/Qwen3.6/Qwen3.6-27B-MTP-IQ4_KS.gguf \
-m /mnt/ssd/Models/Qwen3.6/Qwen3.6-27B-MTP-IQ4_KS.gguf \
-c ''${ctx} -ctk q8_0 -ctv q8_0 -ngl 99 \
-mtp --draft-max 4 --draft-p-min 0.75 \
-muge -mqkv -cram 32768 --ctx-checkpoints 32 \
--jinja --chat-template-kwargs '{"preserve_thinking":true}'
'';
metadata = {
type = [
"text-generation"
"coding"
];
};
};
# https://huggingface.co/unsloth/Qwen3.6-27B-GGUF-MTP/tree/main
"qwen3.6-27b-cuda0" = {
name = "Qwen3.6 27B (CUDA0, UD-Q4)";
macros.ctx = "110000";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3.6/Qwen3.6-27B-UD-Q4_K_XL.gguf \
-c ''${ctx} \
-ctk f16 -ctv q8_0 \
-mtp --draft-max 4 --draft-p-min 0.70 \
--merge-qkv \
-muge \
-ngl 99 \
--threads 1 \
--parallel 1 \
--jinja \
--no-mmap \
--ctx-checkpoints 32 \
-cram 32768
-np 2 -kvu \
--temp 0.6 \
--top-p 0.95 \
--top-k 20 \
--min-p 0.00 \
--presence-penalty 0.0 \
-ctk q8_0 \
-ctv q8_0 \
--spec-type draft-mtp \
--spec-draft-n-max 3 \
-dev CUDA0 \
-fit off \
--chat-template-kwargs "{\"preserve_thinking\": true}"
'';
metadata = {
type = [
@@ -120,8 +117,8 @@ in
};
# https://huggingface.co/unsloth/gemma-4-26B-A4B-it-GGUF/tree/main
"gemma-4-26b-vision" = {
name = "Gemma 4 (26B) - Vision";
"gemma-4-26b-vl-cuda0" = {
name = "Gemma 4 26B (VL, CUDA0)";
macros.ctx = "196608";
cmd = ''
${llama-cpp}/bin/llama-server \
@@ -150,12 +147,285 @@ in
};
};
# https://huggingface.co/Lorbus/Qwen3.6-27B-int4-AutoRound
"qwen3.6-27b-vllm-50k-cuda0" = {
name = "Qwen3.6 27B (vLLM, 50K, CUDA0)";
checkEndpoint = "/v1/models";
macros.ctx = "50000";
proxy = "http://127.0.0.1:\${PORT}";
cmd = ''
${pkgs.docker}/bin/docker run --rm --device=nvidia.com/gpu=all \
--name ''${MODEL_ID} \
-e CUDA_DEVICE_ORDER=PCI_BUS_ID \
-e CUDA_VISIBLE_DEVICES=0 \
-e VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS=0 \
-v /mnt/ssd/vLLM/Models:/root/.cache/huggingface \
-p ''${PORT}:8000 \
vllm/vllm-openai:latest \
/root/.cache/huggingface/qwen3.6-27b-autoround-int4 \
--served-model-name ''${MODEL_ID} \
--quantization auto_round \
--dtype float16 \
--tensor-parallel-size 1 \
--gpu-memory-utilization 0.97 \
--max-model-len ''${ctx} \
--max-num-seqs 1 \
--max-num-batched-tokens 4128 \
--kv-cache-dtype fp8_e5m2 \
--enable-chunked-prefill \
--enable-prefix-caching \
--speculative-config '{"method":"mtp","num_speculative_tokens":3}' \
--enable-auto-tool-choice \
--tool-call-parser qwen3_coder \
--trust-remote-code \
--default-chat-template-kwargs '{"enable_thinking": false}' \
--host 0.0.0.0 \
--port 8000
'';
cmdStop = "${pkgs.docker}/bin/docker stop \${MODEL_ID}";
metadata = {
type = [
"text-generation"
"coding"
];
};
};
# https://github.com/noonghunna/club-3090/tree/master/models/qwen3.6-27b/vllm
# Upstream: club-3090 83bf73d (2026-05-10) - single/long-text.yml
# Long-text variant - 180K context, text-only (no vision)
# TurboQuant 3-bit KV + MTP n=3 + Genesis v7.69 + Cliff 2 closure recipe
"vllm-qwen3.6-27b-long-text" = {
name = "vLLM Qwen3.6 (27B) - Long Text";
"qwen3.6-27b-vllm-75k-cuda0" = {
name = "Qwen3.6 27B (vLLM, 75K, CUDA0)";
checkEndpoint = "/v1/models";
macros.ctx = "75000";
proxy = "http://127.0.0.1:\${PORT}";
cmd =
let
vllmCmd = ''
set -e; pip install xxhash pandas scipy -q;
python3 -m vllm._genesis.patches.apply_all;
python3 /patches/patch_timings_1acd67a.py;
exec vllm serve ''${VLLM_ENFORCE_EAGER:+--enforce-eager}
--served-model-name ''${MODEL_ID}
--model /root/.cache/huggingface/qwen3.6-27b-autoround-int4
--quantization auto_round
--dtype float16
--tensor-parallel-size 1
--max-model-len ''${ctx}
--gpu-memory-utilization 0.97
--max-num-seqs 1
--max-num-batched-tokens 2048
--kv-cache-dtype fp8_e5m2
--language-model-only
--trust-remote-code
--reasoning-parser qwen3
--enable-auto-tool-choice
--tool-call-parser qwen3_coder
--chat-template /templates/chat_template.jinja
--enable-prefix-caching
--enable-chunked-prefill
--speculative-config '{\"method\":\"mtp\",\"num_speculative_tokens\":3}'
--host 0.0.0.0
--port 8000
'';
vllmCmdFlat = builtins.replaceStrings [ "\n" ] [ " " ] vllmCmd;
in
''
${pkgs.docker}/bin/docker run --rm --device=nvidia.com/gpu=all \
--name ''${MODEL_ID} \
--ipc=host \
-e CUDA_DEVICE_MAX_CONNECTIONS=8 \
-e CUDA_DEVICE_ORDER=PCI_BUS_ID \
-e CUDA_VISIBLE_DEVICES=0 \
-e GENESIS_ENABLE_P58_ASYNC_PLACEHOLDER_FIX=1 \
-e GENESIS_ENABLE_P64_QWEN3CODER_MTP_STREAMING=1 \
-e GENESIS_ENABLE_P66_CUDAGRAPH_SIZE_FILTER=1 \
-e GENESIS_ENABLE_P68_AUTO_FORCE_TOOL=1 \
-e GENESIS_ENABLE_P69_LONG_CTX_TOOL_REMINDER=1 \
-e GENESIS_ENABLE_P72_PROFILE_RUN_CAP=1 \
-e GENESIS_ENABLE_P74_CHUNK_CLAMP=1 \
-e GENESIS_ENABLE_P94=1 \
-e GENESIS_ENABLE_PN13_CUDA_GRAPH_LAMBDA_ARITY=1 \
-e GENESIS_ENABLE_PN14_TQ_DECODE_OOB_CLAMP=1 \
-e GENESIS_ENABLE_PN17_FA2_LSE_CLAMP=1 \
-e GENESIS_ENABLE_PN19_SCOPED_MAX_SPLIT=1 \
-e GENESIS_ENABLE_PN59_STREAMING_GDN=1 \
-e GENESIS_ENABLE_PN8_MTP_DRAFT_ONLINE_QUANT=1 \
-e GENESIS_P68_P69_LONG_CTX_THRESHOLD_CHARS=50000 \
-e GENESIS_PROFILE_RUN_CAP_M=4128 \
-e NCCL_CUMEM_ENABLE=0 \
-e NCCL_P2P_DISABLE=1 \
-e OMP_NUM_THREADS=1 \
-e PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True,max_split_size_mb:512 \
-e VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
-e VLLM_FLOAT32_MATMUL_PRECISION=high \
-e VLLM_MARLIN_USE_ATOMIC_ADD=1 \
-e VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS=0 \
-e VLLM_NO_USAGE_STATS=1 \
-e VLLM_USE_FLASHINFER_SAMPLER=1 \
-e VLLM_WORKER_MULTIPROC_METHOD=spawn \
-e VLLM_ENFORCE_EAGER \
-v /mnt/ssd/vLLM/Models:/root/.cache/huggingface \
-v /mnt/ssd/vLLM/Patches/genesis/vllm/_genesis:/usr/local/lib/python3.12/dist-packages/vllm/_genesis:ro \
-v /mnt/ssd/vLLM/Patches/patch_timings_1acd67a.py:/patches/patch_timings_1acd67a.py:ro \
-v /mnt/ssd/vLLM/Templates/chat_template-v11.jinja:/templates/chat_template.jinja \
-p ''${PORT}:8000 \
--entrypoint /bin/bash \
vllm/vllm-openai:nightly-1acd67a795ebccdf9b9db7697ae9082058301657 \
-c "${vllmCmdFlat}"
'';
# Cache Bug - On resume from cache, VRAM usage is higher than just generating in real time.
# -e TRITON_CACHE_DIR=/root/.triton/cache \
# -v /mnt/ssd/vLLM/Cache/torch_compile:/root/.cache/vllm/torch_compile_cache \
# -v /mnt/ssd/vLLM/Cache/triton:/root/.triton/cache \
cmdStop = "${pkgs.docker}/bin/docker stop \${MODEL_ID}";
metadata = {
type = [
"text-generation"
"coding"
];
};
};
# https://github.com/noonghunna/club-3090/tree/master/models/qwen3.6-27b/vllm
"qwen3.6-27b-vllm-145k-vl-cuda0" = {
name = "Qwen3.6 27B (vLLM, 145K, VL, CUDA0)";
checkEndpoint = "/v1/models";
macros.ctx = "145000";
proxy = "http://127.0.0.1:\${PORT}";
cmd =
let
vllmCmd = ''
set -e; pip install xxhash pandas scipy -q;
python3 -m vllm._genesis.patches.apply_all;
python3 /patches/patch_timings_1acd67a.py;
exec vllm serve ''${VLLM_ENFORCE_EAGER:+--enforce-eager}
--served-model-name ''${MODEL_ID}
--model /root/.cache/huggingface/qwen3.6-27b-autoround-int4
--quantization auto_round
--dtype float16
--tensor-parallel-size 1
--max-model-len ''${ctx}
--gpu-memory-utilization 0.95
--max-num-seqs 1
--max-num-batched-tokens 4128
--kv-cache-dtype turboquant_3bit_nc
--trust-remote-code
--reasoning-parser qwen3
--enable-auto-tool-choice
--tool-call-parser qwen3_coder
--enable-prefix-caching
--enable-chunked-prefill
--no-scheduler-reserve-full-isl
--speculative-config '{\"method\":\"mtp\",\"num_speculative_tokens\":3}'
--host 0.0.0.0
--port 8000
'';
vllmCmdFlat = builtins.replaceStrings [ "\n" ] [ " " ] vllmCmd;
in
''
${pkgs.docker}/bin/docker run --rm --device=nvidia.com/gpu=all \
--name ''${MODEL_ID} \
--ipc=host \
-e CUDA_DEVICE_MAX_CONNECTIONS=8 \
-e CUDA_DEVICE_ORDER=PCI_BUS_ID \
-e CUDA_VISIBLE_DEVICES=0 \
-e GENESIS_BUFFER_MODE=shared \
-e GENESIS_ENABLE_P100=1 \
-e GENESIS_ENABLE_P101=1 \
-e GENESIS_ENABLE_P103=1 \
-e GENESIS_ENABLE_P15B_FA_VARLEN_CLAMP=1 \
-e GENESIS_ENABLE_P38B_COMPILE_SAFE=1 \
-e GENESIS_ENABLE_P4=1 \
-e GENESIS_ENABLE_P58_ASYNC_PLACEHOLDER_FIX=1 \
-e GENESIS_ENABLE_P60B_TRITON_KERNEL=1 \
-e GENESIS_ENABLE_P60_GDN_NGRAM_FIX=1 \
-e GENESIS_ENABLE_P61B_STREAMING_OVERLAP=1 \
-e GENESIS_ENABLE_P61_QWEN3_MULTI_TOOL=1 \
-e GENESIS_ENABLE_P62_STRUCT_OUT_SPEC_TIMING=1 \
-e GENESIS_ENABLE_P64_QWEN3CODER_MTP_STREAMING=1 \
-e GENESIS_ENABLE_P66_CUDAGRAPH_SIZE_FILTER=1 \
-e GENESIS_ENABLE_P67_TQ_MULTI_QUERY_KERNEL=1 \
-e GENESIS_ENABLE_P68_AUTO_FORCE_TOOL=1 \
-e GENESIS_ENABLE_P69_LONG_CTX_TOOL_REMINDER=1 \
-e GENESIS_ENABLE_P72_PROFILE_RUN_CAP=1 \
-e GENESIS_ENABLE_P74_CHUNK_CLAMP=1 \
-e GENESIS_ENABLE_P78_TOLIST_CAPTURE_GUARD=0 \
-e GENESIS_ENABLE_P81_FP8_BLOCK_SCALED_M_LE_8=0 \
-e GENESIS_ENABLE_P82=0 \
-e GENESIS_ENABLE_P83=1 \
-e GENESIS_ENABLE_P87=1 \
-e GENESIS_ENABLE_P91=1 \
-e GENESIS_ENABLE_P94=1 \
-e GENESIS_ENABLE_P98=1 \
-e GENESIS_ENABLE_P99=1 \
-e GENESIS_ENABLE_PN11_GDN_AB_CONTIGUOUS=1 \
-e GENESIS_ENABLE_PN12_FFN_INTERMEDIATE_POOL=1 \
-e GENESIS_ENABLE_PN13_CUDA_GRAPH_LAMBDA_ARITY=1 \
-e GENESIS_ENABLE_PN14_TQ_DECODE_OOB_CLAMP=1 \
-e GENESIS_ENABLE_PN17_FA2_LSE_CLAMP=1 \
-e GENESIS_ENABLE_PN19_SCOPED_MAX_SPLIT=1 \
-e GENESIS_ENABLE_PN22_LOCAL_ARGMAX_TP=1 \
-e GENESIS_ENABLE_PN25_SILU_INDUCTOR_SAFE=1 \
-e GENESIS_ENABLE_PN26_SPARSE_V=1 \
-e GENESIS_ENABLE_PN30_DS_LAYOUT_SPEC_DECODE=1 \
-e GENESIS_ENABLE_PN34_WORKSPACE_LOCK_RELAX=1 \
-e GENESIS_ENABLE_PN59_STREAMING_GDN=1 \
-e GENESIS_ENABLE_PN8_MTP_DRAFT_ONLINE_QUANT=1 \
-e GENESIS_ENABLE_PN9_INDEPENDENT_DRAFTER_ATTN=1 \
-e GENESIS_P68_P69_LONG_CTX_THRESHOLD_CHARS=50000 \
-e GENESIS_P82_THRESHOLD_SINGLE=0.3 \
-e GENESIS_PN26_SPARSE_V_BLOCK_KV=8 \
-e GENESIS_PN26_SPARSE_V_NUM_WARPS=4 \
-e GENESIS_PN26_SPARSE_V_THRESHOLD=0.01 \
-e GENESIS_PREALLOC_TOKEN_BUDGET=4128 \
-e GENESIS_PROFILE_RUN_CAP_M=4128 \
-e NCCL_CUMEM_ENABLE=0 \
-e NCCL_P2P_DISABLE=1 \
-e OMP_NUM_THREADS=1 \
-e PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True,max_split_size_mb:512 \
-e VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
-e VLLM_FLOAT32_MATMUL_PRECISION=high \
-e VLLM_MARLIN_USE_ATOMIC_ADD=1 \
-e VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS=0 \
-e VLLM_NO_USAGE_STATS=1 \
-e VLLM_SSM_CONV_STATE_LAYOUT=DS \
-e VLLM_USE_FLASHINFER_SAMPLER=1 \
-e VLLM_USE_FUSED_MOE_GROUPED_TOPK=1 \
-e VLLM_WORKER_MULTIPROC_METHOD=spawn \
-e VLLM_ENFORCE_EAGER \
-v /mnt/ssd/vLLM/Models:/root/.cache/huggingface \
-v /mnt/ssd/vLLM/Patches/genesis/vllm/_genesis:/usr/local/lib/python3.12/dist-packages/vllm/_genesis:ro \
-v /mnt/ssd/vLLM/Patches/patch_timings_1acd67a.py:/patches/patch_timings_1acd67a.py:ro \
-p ''${PORT}:8000 \
--entrypoint /bin/bash \
vllm/vllm-openai:nightly-1acd67a795ebccdf9b9db7697ae9082058301657 \
-c "${vllmCmdFlat}"
'';
# Cache Bug - On resume from cache, VRAM usage is higher than just generating in real time.
# -e TRITON_CACHE_DIR=/root/.triton/cache \
# -v /mnt/ssd/vLLM/Cache/torch_compile:/root/.cache/vllm/torch_compile_cache \
# -v /mnt/ssd/vLLM/Cache/triton:/root/.triton/cache \
cmdStop = "${pkgs.docker}/bin/docker stop \${MODEL_ID}";
metadata = {
type = [
"text-generation"
"coding"
"vision"
];
};
};
# https://github.com/noonghunna/club-3090/tree/master/models/qwen3.6-27b/vllm
"qwen3.6-27b-vllm-180k-cuda0" = {
name = "Qwen3.6 27B (vLLM, 180K, CUDA0)";
checkEndpoint = "/v1/models";
macros.ctx = "180000";
proxy = "http://127.0.0.1:\${PORT}";
cmd =
@@ -291,276 +561,13 @@ in
};
};
# https://github.com/noonghunna/club-3090/tree/master/models/qwen3.6-27b/vllm
# Upstream: club-3090 83bf73d (2026-05-10) - single/long-vision.yml
# Long-vision variant - 145K context with vision tower active
# TurboQuant 3-bit KV + MTP n=3 + Genesis v7.69 + Cliff 2 env vars (mem-util kept at 0.95)
"vllm-qwen3.6-27b-long-vision" = {
name = "vLLM Qwen3.6 (27B) - Long Vision";
macros.ctx = "145000";
proxy = "http://127.0.0.1:\${PORT}";
cmd =
let
vllmCmd = ''
set -e; pip install xxhash pandas scipy -q;
python3 -m vllm._genesis.patches.apply_all;
python3 /patches/patch_timings_1acd67a.py;
exec vllm serve ''${VLLM_ENFORCE_EAGER:+--enforce-eager}
--served-model-name ''${MODEL_ID}
--model /root/.cache/huggingface/qwen3.6-27b-autoround-int4
--quantization auto_round
--dtype float16
--tensor-parallel-size 1
--max-model-len ''${ctx}
--gpu-memory-utilization 0.95
--max-num-seqs 1
--max-num-batched-tokens 4128
--kv-cache-dtype turboquant_3bit_nc
--trust-remote-code
--reasoning-parser qwen3
--enable-auto-tool-choice
--tool-call-parser qwen3_coder
--enable-prefix-caching
--enable-chunked-prefill
--no-scheduler-reserve-full-isl
--speculative-config '{\"method\":\"mtp\",\"num_speculative_tokens\":3}'
--host 0.0.0.0
--port 8000
'';
vllmCmdFlat = builtins.replaceStrings [ "\n" ] [ " " ] vllmCmd;
in
''
${pkgs.docker}/bin/docker run --rm --device=nvidia.com/gpu=all \
--name ''${MODEL_ID} \
--ipc=host \
-e CUDA_DEVICE_MAX_CONNECTIONS=8 \
-e CUDA_DEVICE_ORDER=PCI_BUS_ID \
-e CUDA_VISIBLE_DEVICES=0 \
-e GENESIS_BUFFER_MODE=shared \
-e GENESIS_ENABLE_P100=1 \
-e GENESIS_ENABLE_P101=1 \
-e GENESIS_ENABLE_P103=1 \
-e GENESIS_ENABLE_P15B_FA_VARLEN_CLAMP=1 \
-e GENESIS_ENABLE_P38B_COMPILE_SAFE=1 \
-e GENESIS_ENABLE_P4=1 \
-e GENESIS_ENABLE_P58_ASYNC_PLACEHOLDER_FIX=1 \
-e GENESIS_ENABLE_P60B_TRITON_KERNEL=1 \
-e GENESIS_ENABLE_P60_GDN_NGRAM_FIX=1 \
-e GENESIS_ENABLE_P61B_STREAMING_OVERLAP=1 \
-e GENESIS_ENABLE_P61_QWEN3_MULTI_TOOL=1 \
-e GENESIS_ENABLE_P62_STRUCT_OUT_SPEC_TIMING=1 \
-e GENESIS_ENABLE_P64_QWEN3CODER_MTP_STREAMING=1 \
-e GENESIS_ENABLE_P66_CUDAGRAPH_SIZE_FILTER=1 \
-e GENESIS_ENABLE_P67_TQ_MULTI_QUERY_KERNEL=1 \
-e GENESIS_ENABLE_P68_AUTO_FORCE_TOOL=1 \
-e GENESIS_ENABLE_P69_LONG_CTX_TOOL_REMINDER=1 \
-e GENESIS_ENABLE_P72_PROFILE_RUN_CAP=1 \
-e GENESIS_ENABLE_P74_CHUNK_CLAMP=1 \
-e GENESIS_ENABLE_P78_TOLIST_CAPTURE_GUARD=0 \
-e GENESIS_ENABLE_P81_FP8_BLOCK_SCALED_M_LE_8=0 \
-e GENESIS_ENABLE_P82=0 \
-e GENESIS_ENABLE_P83=1 \
-e GENESIS_ENABLE_P87=1 \
-e GENESIS_ENABLE_P91=1 \
-e GENESIS_ENABLE_P94=1 \
-e GENESIS_ENABLE_P98=1 \
-e GENESIS_ENABLE_P99=1 \
-e GENESIS_ENABLE_PN11_GDN_AB_CONTIGUOUS=1 \
-e GENESIS_ENABLE_PN12_FFN_INTERMEDIATE_POOL=1 \
-e GENESIS_ENABLE_PN13_CUDA_GRAPH_LAMBDA_ARITY=1 \
-e GENESIS_ENABLE_PN14_TQ_DECODE_OOB_CLAMP=1 \
-e GENESIS_ENABLE_PN17_FA2_LSE_CLAMP=1 \
-e GENESIS_ENABLE_PN19_SCOPED_MAX_SPLIT=1 \
-e GENESIS_ENABLE_PN22_LOCAL_ARGMAX_TP=1 \
-e GENESIS_ENABLE_PN25_SILU_INDUCTOR_SAFE=1 \
-e GENESIS_ENABLE_PN26_SPARSE_V=1 \
-e GENESIS_ENABLE_PN30_DS_LAYOUT_SPEC_DECODE=1 \
-e GENESIS_ENABLE_PN34_WORKSPACE_LOCK_RELAX=1 \
-e GENESIS_ENABLE_PN59_STREAMING_GDN=1 \
-e GENESIS_ENABLE_PN8_MTP_DRAFT_ONLINE_QUANT=1 \
-e GENESIS_ENABLE_PN9_INDEPENDENT_DRAFTER_ATTN=1 \
-e GENESIS_P68_P69_LONG_CTX_THRESHOLD_CHARS=50000 \
-e GENESIS_P82_THRESHOLD_SINGLE=0.3 \
-e GENESIS_PN26_SPARSE_V_BLOCK_KV=8 \
-e GENESIS_PN26_SPARSE_V_NUM_WARPS=4 \
-e GENESIS_PN26_SPARSE_V_THRESHOLD=0.01 \
-e GENESIS_PREALLOC_TOKEN_BUDGET=4128 \
-e GENESIS_PROFILE_RUN_CAP_M=4128 \
-e NCCL_CUMEM_ENABLE=0 \
-e NCCL_P2P_DISABLE=1 \
-e OMP_NUM_THREADS=1 \
-e PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True,max_split_size_mb:512 \
-e VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
-e VLLM_FLOAT32_MATMUL_PRECISION=high \
-e VLLM_MARLIN_USE_ATOMIC_ADD=1 \
-e VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS=0 \
-e VLLM_NO_USAGE_STATS=1 \
-e VLLM_SSM_CONV_STATE_LAYOUT=DS \
-e VLLM_USE_FLASHINFER_SAMPLER=1 \
-e VLLM_USE_FUSED_MOE_GROUPED_TOPK=1 \
-e VLLM_WORKER_MULTIPROC_METHOD=spawn \
-e VLLM_ENFORCE_EAGER \
-v /mnt/ssd/vLLM/Models:/root/.cache/huggingface \
-v /mnt/ssd/vLLM/Patches/genesis/vllm/_genesis:/usr/local/lib/python3.12/dist-packages/vllm/_genesis:ro \
-v /mnt/ssd/vLLM/Patches/patch_timings_1acd67a.py:/patches/patch_timings_1acd67a.py:ro \
-p ''${PORT}:8000 \
--entrypoint /bin/bash \
vllm/vllm-openai:nightly-1acd67a795ebccdf9b9db7697ae9082058301657 \
-c "${vllmCmdFlat}"
'';
# Cache Bug - On resume from cache, VRAM usage is higher than just generating in real time.
# -e TRITON_CACHE_DIR=/root/.triton/cache \
# -v /mnt/ssd/vLLM/Cache/torch_compile:/root/.cache/vllm/torch_compile_cache \
# -v /mnt/ssd/vLLM/Cache/triton:/root/.triton/cache \
cmdStop = "${pkgs.docker}/bin/docker stop \${MODEL_ID}";
metadata = {
type = [
"text-generation"
"coding"
"vision"
];
};
};
# https://github.com/noonghunna/club-3090/tree/master/models/qwen3.6-27b/vllm
# Upstream: club-3090 83bf73d (2026-05-10) - single/tools-text.yml
# Tools-text variant - 75K context, text-only (no vision)
# fp8_e5m2 KV + MTP n=3. IDE agents (Cline, Cursor, OpenCode, etc.)
"vllm-qwen3.6-27b-tools-text" = {
name = "vLLM Qwen3.6 (27B) - Tools Text";
macros.ctx = "75000";
proxy = "http://127.0.0.1:\${PORT}";
cmd =
let
vllmCmd = ''
set -e; pip install xxhash pandas scipy -q;
python3 -m vllm._genesis.patches.apply_all;
python3 /patches/patch_timings_1acd67a.py;
exec vllm serve ''${VLLM_ENFORCE_EAGER:+--enforce-eager}
--served-model-name ''${MODEL_ID}
--model /root/.cache/huggingface/qwen3.6-27b-autoround-int4
--quantization auto_round
--dtype float16
--tensor-parallel-size 1
--max-model-len ''${ctx}
--gpu-memory-utilization 0.97
--max-num-seqs 1
--max-num-batched-tokens 2048
--kv-cache-dtype fp8_e5m2
--language-model-only
--trust-remote-code
--reasoning-parser qwen3
--enable-auto-tool-choice
--tool-call-parser qwen3_coder
--chat-template /templates/chat_template.jinja
--enable-prefix-caching
--enable-chunked-prefill
--speculative-config '{\"method\":\"mtp\",\"num_speculative_tokens\":3}'
--host 0.0.0.0
--port 8000
'';
vllmCmdFlat = builtins.replaceStrings [ "\n" ] [ " " ] vllmCmd;
in
''
${pkgs.docker}/bin/docker run --rm --device=nvidia.com/gpu=all \
--name ''${MODEL_ID} \
--ipc=host \
-e CUDA_DEVICE_MAX_CONNECTIONS=8 \
-e CUDA_DEVICE_ORDER=PCI_BUS_ID \
-e CUDA_VISIBLE_DEVICES=0 \
-e GENESIS_ENABLE_P58_ASYNC_PLACEHOLDER_FIX=1 \
-e GENESIS_ENABLE_P64_QWEN3CODER_MTP_STREAMING=1 \
-e GENESIS_ENABLE_P66_CUDAGRAPH_SIZE_FILTER=1 \
-e GENESIS_ENABLE_P68_AUTO_FORCE_TOOL=1 \
-e GENESIS_ENABLE_P69_LONG_CTX_TOOL_REMINDER=1 \
-e GENESIS_ENABLE_P72_PROFILE_RUN_CAP=1 \
-e GENESIS_ENABLE_P74_CHUNK_CLAMP=1 \
-e GENESIS_ENABLE_P94=1 \
-e GENESIS_ENABLE_PN13_CUDA_GRAPH_LAMBDA_ARITY=1 \
-e GENESIS_ENABLE_PN14_TQ_DECODE_OOB_CLAMP=1 \
-e GENESIS_ENABLE_PN17_FA2_LSE_CLAMP=1 \
-e GENESIS_ENABLE_PN19_SCOPED_MAX_SPLIT=1 \
-e GENESIS_ENABLE_PN59_STREAMING_GDN=1 \
-e GENESIS_ENABLE_PN8_MTP_DRAFT_ONLINE_QUANT=1 \
-e GENESIS_P68_P69_LONG_CTX_THRESHOLD_CHARS=50000 \
-e GENESIS_PROFILE_RUN_CAP_M=4128 \
-e NCCL_CUMEM_ENABLE=0 \
-e NCCL_P2P_DISABLE=1 \
-e OMP_NUM_THREADS=1 \
-e PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True,max_split_size_mb:512 \
-e VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
-e VLLM_FLOAT32_MATMUL_PRECISION=high \
-e VLLM_MARLIN_USE_ATOMIC_ADD=1 \
-e VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS=0 \
-e VLLM_NO_USAGE_STATS=1 \
-e VLLM_USE_FLASHINFER_SAMPLER=1 \
-e VLLM_WORKER_MULTIPROC_METHOD=spawn \
-e VLLM_ENFORCE_EAGER \
-v /mnt/ssd/vLLM/Models:/root/.cache/huggingface \
-v /mnt/ssd/vLLM/Patches/genesis/vllm/_genesis:/usr/local/lib/python3.12/dist-packages/vllm/_genesis:ro \
-v /mnt/ssd/vLLM/Patches/patch_timings_1acd67a.py:/patches/patch_timings_1acd67a.py:ro \
-v /mnt/ssd/vLLM/Templates/chat_template-v11.jinja:/templates/chat_template.jinja \
-p ''${PORT}:8000 \
--entrypoint /bin/bash \
vllm/vllm-openai:nightly-1acd67a795ebccdf9b9db7697ae9082058301657 \
-c "${vllmCmdFlat}"
'';
# Cache Bug - On resume from cache, VRAM usage is higher than just generating in real time.
# -e TRITON_CACHE_DIR=/root/.triton/cache \
# -v /mnt/ssd/vLLM/Cache/torch_compile:/root/.cache/vllm/torch_compile_cache \
# -v /mnt/ssd/vLLM/Cache/triton:/root/.triton/cache \
cmdStop = "${pkgs.docker}/bin/docker stop \${MODEL_ID}";
metadata = {
type = [
"text-generation"
"coding"
];
};
};
# ---------------------------------------
# ------------- GTX 1080 Ti -------------
# ---------------------------------------
# https://huggingface.co/unsloth/Qwen3-VL-8B-Instruct-GGUF/tree/main
"qwen3-8b-vision" = {
name = "Qwen3 Vision (8B) - Thinking";
macros.ctx = "60000";
env = [ "CUDA_VISIBLE_DEVICES=1" ];
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3/Qwen3-VL-8B-Instruct-UD-Q4_K_XL.gguf \
--mmproj /mnt/ssd/Models/Qwen3/Qwen3-VL-8B-Instruct-UD-Q4_K_XL_mmproj-F16.gguf \
-c ''${ctx} \
--temp 0.7 \
--min-p 0.0 \
--top-p 0.8 \
--top-k 20 \
-ctk q8_0 \
-ctv q8_0 \
-fit off \
-dev CUDA0
'';
metadata = {
type = [
"text-generation"
"vision"
];
};
};
# https://huggingface.co/unsloth/Qwen3.5-9B-GGUF/tree/main
"qwen3.5-9b-thinking" = {
name = "Qwen3.5 (9B) - Thinking";
"qwen3.5-9b-vl-cuda1" = {
name = "Qwen3.5 9B (VL, CUDA1)";
macros.ctx = "131072";
env = [ "CUDA_VISIBLE_DEVICES=1" ];
cmd = ''
@@ -586,8 +593,8 @@ in
};
# https://huggingface.co/unsloth/Qwen3.5-4B-GGUF/tree/main
"qwen3.5-4b-thinking" = {
name = "Qwen3.5 (4B) - Thinking";
"qwen3.5-4b-cuda1" = {
name = "Qwen3.5 4B (CUDA1)";
macros.ctx = "131072";
env = [ "CUDA_VISIBLE_DEVICES=1" ];
cmd = ''
@@ -605,6 +612,7 @@ in
metadata = {
type = [
"text-generation"
"coding"
];
};
};
@@ -614,8 +622,8 @@ in
# ---------------------------------------
# https://huggingface.co/unsloth/Qwen3-Coder-Next-GGUF/tree/main
"qwen3-coder-next-80b-instruct" = {
name = "Qwen3 Coder Next (80B) - Instruct";
"qwen3-coder-next-80b-dual" = {
name = "Qwen3 Coder Next 80B (Dual GPU)";
macros.ctx = "131072";
cmd = ''
${llama-cpp}/bin/llama-server \
@@ -640,15 +648,15 @@ in
};
# https://huggingface.co/unsloth/Qwen3.6-27B-GGUF-MTP/tree/main
"qwen3.6-27b-udq6-thinking" = {
name = "Qwen3.6 (27B) - Thinking (UD-Q6)";
macros.ctx = "196608";
"qwen3.6-27b-dual" = {
name = "Qwen3.6 27B (Dual GPU, UD-Q6)";
macros.ctx = "120000";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3.6/Qwen3.6-27B-UD-Q6_K_XL.gguf \
-c ''${ctx} \
--parallel 1 \
-np 4 -kvu \
--temp 0.6 \
--top-p 0.95 \
--top-k 20 \
@@ -656,10 +664,10 @@ in
--presence-penalty 0.0 \
-ctk q8_0 \
-ctv q8_0 \
--spec-type mtp \
--spec-type draft-mtp \
--spec-draft-n-max 3 \
-dev CUDA0,CUDA1 \
-ts 75,25 \
-ts 73,27 \
-fit off \
--chat-template-kwargs "{\"preserve_thinking\": true}"
'';
@@ -671,13 +679,47 @@ in
};
};
# https://huggingface.co/unsloth/Qwen3.6-35B-A3B-MTP-GGUF/tree/main
"qwen3.6-35b-dual" = {
name = "Qwen3.6 35B (Dual GPU, UD-Q6)";
# macros.ctx = "215000";
# -ctk q8_0 \
# -ctv q8_0 \
macros.ctx = "131072";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3.6/Qwen3.6-35B-A3B-UD-Q6_K.gguf \
-c ''${ctx} \
-np 4 -kvu \
--temp 0.6 \
--top-p 0.95 \
--top-k 20 \
--min-p 0.00 \
--presence-penalty 0.0 \
--spec-type draft-mtp \
--spec-draft-n-max 3 \
-dev CUDA0,CUDA1 \
-fit off \
-ts 72,28 \
--chat-template-kwargs "{\"preserve_thinking\": true}"
'';
metadata = {
type = [
"text-generation"
"coding"
];
};
};
# ---------------------------------------
# ---------- Stable Diffussion ----------
# ---------- Stable Diffusion ----------
# ---------------------------------------
"z-image-turbo" = {
"z-image-turbo-cuda0" = {
name = "Z-Image-Turbo";
checkEndpoint = "/";
env = [ "CUDA_VISIBLE_DEVICES=0" ];
cmd = ''
${stable-diffusion-cpp}/bin/sd-server \
--listen-port ''${PORT} \
@@ -694,9 +736,10 @@ in
};
};
"qwen-image-edit-2511" = {
"qwen-image-edit-2511-cuda0" = {
name = "Qwen Image Edit 2511";
checkEndpoint = "/";
env = [ "CUDA_VISIBLE_DEVICES=0" ];
cmd = ''
${stable-diffusion-cpp}/bin/sd-server \
--listen-port ''${PORT} \
@@ -720,9 +763,10 @@ in
};
};
"qwen-image-2512" = {
"qwen-image-2512-cuda0" = {
name = "Qwen Image 2512";
checkEndpoint = "/";
env = [ "CUDA_VISIBLE_DEVICES=0" ];
cmd = ''
${stable-diffusion-cpp}/bin/sd-server \
--listen-port ''${PORT} \
@@ -742,9 +786,10 @@ in
};
};
"chroma-radiance" = {
"chroma-radiance-cuda0" = {
name = "Chroma Radiance";
checkEndpoint = "/";
env = [ "CUDA_VISIBLE_DEVICES=0" ];
cmd = ''
${stable-diffusion-cpp}/bin/sd-server \
--listen-port ''${PORT} \
@@ -764,32 +809,31 @@ in
# Concurrent Model Matrix
#
# CUDA0 models can run alongside CUDA1 models (one each). Models not
# listed in any set (dual-GPU models using -ts) run alone and evict
# everything.
# listed in any set (dual-GPU models) run alone and evict everything.
matrix = {
vars = {
# --- RTX 3090 Models ---
vlt = "vllm-qwen3.6-27b-long-text";
vtt = "vllm-qwen3.6-27b-tools-text";
vlv = "vllm-qwen3.6-27b-long-vision";
go = "gpt-oss-20b-thinking";
g4 = "gemma-4-26b-vision";
q36a = "qwen3.6-35b-thinking";
q36b = "qwen3.6-27b-udq4-thinking";
iq36 = "ik-qwen3.6-27b-iq4ks-thinking";
zi = "z-image-turbo";
qie = "qwen-image-edit-2511";
qi = "qwen-image-2512";
cr = "chroma-radiance";
v180 = "qwen3.6-27b-vllm-180k-cuda0";
v145 = "qwen3.6-27b-vllm-145k-vl-cuda0";
v75 = "qwen3.6-27b-vllm-75k-cuda0";
v50 = "qwen3.6-27b-vllm-50k-cuda0";
go = "gpt-oss-20b-cuda0";
g4 = "gemma-4-26b-vl-cuda0";
q36a = "qwen3.6-35b-cuda0";
q36b = "qwen3.6-27b-cuda0";
q36ik = "qwen3.6-27b-ik-cuda0";
zi = "z-image-turbo-cuda0";
qie = "qwen-image-edit-2511-cuda0";
qi = "qwen-image-2512-cuda0";
cr = "chroma-radiance-cuda0";
# --- GTX 1080 Ti Models ---
qv = "qwen3-8b-vision";
q4 = "qwen3.5-4b-thinking";
q9 = "qwen3.5-9b-thinking";
q4 = "qwen3.5-4b-cuda1";
q9 = "qwen3.5-9b-vl-cuda1";
};
sets = {
concurrent = "(go | g4 | q36a | q36b | iq36 | vlt | vtt | vlv | zi | qie | qi | cr) & (qv | q4 | q9)";
concurrent = "(go | g4 | q36a | q36b | q36ik | v180 | v145 | v75 | v50 | zi | qie | qi | cr) & (q4 | q9)";
};
};
}

View File

@@ -11,6 +11,31 @@ let
cfg = config.${namespace}.services.llama-swap;
llama-swap = pkgs.reichard.llama-swap;
llamaCppPresets =
let
models = (import ./config.nix { inherit pkgs; }).models;
llamaCppModels = lib.filterAttrs (_: model: lib.hasInfix "/bin/llama-server" (model.cmd or "")) models;
in
builtins.mapAttrs (_: model: {
inherit (model) cmd;
name = model.name or "";
env = model.env or [ ];
}) llamaCppModels;
llamaCppPresetFile = pkgs.writeText "llama-cpp-presets.json" (builtins.toJSON llamaCppPresets);
llama-cpp-bisect-context = pkgs.writeShellApplication {
name = "llama-cpp-bisect-context";
runtimeInputs = with pkgs; [
coreutils
curl
gnused
python3
util-linux
];
text = builtins.replaceStrings
[ "__LLAMA_CPP_PRESETS__" ]
[ "${llamaCppPresetFile}" ]
(builtins.readFile ./scripts/llama-cpp-bisect-context);
};
in
{
options.${namespace}.services.llama-swap = {
@@ -37,7 +62,6 @@ in
description = "Model swapping for LLaMA C++ Server (or any local OpenAPI compatible server)";
after = [ "network.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
Type = "exec";
ExecStart = "${lib.getExe llama-swap} --listen :8080 --config ${
@@ -97,6 +121,7 @@ in
};
}) apiKeys);
templates."llama-swap.json" = {
restartUnits = [ "llama-swap.service" ];
owner = "llama-swap";
group = "llama-swap";
mode = "0400";
@@ -108,6 +133,8 @@ in
};
};
environment.systemPackages = [ llama-cpp-bisect-context ];
networking.firewall.allowedTCPPorts = [ 8080 ];
};
}

View File

@@ -0,0 +1,464 @@
#!/usr/bin/env bash
set -Eeuo pipefail
usage() {
cat <<'EOF'
Usage:
llama-cpp-bisect-context MODEL --low N --high N [options]
llama-cpp-bisect-context --cmd-template CMD --low N --high N [options]
llama-cpp-bisect-context --cmd-file FILE --low N --high N [options]
Bisect the largest llama.cpp llama-server context that can start and complete
a near-context prompt without OOMing. Startup-only mode is available for isolating the first cliff.
MODEL is a llama.cpp preset generated from the Nix llama-swap config.
Command templates are evaluated with these environment variables:
PORT random listen port for this trial
CTX candidate context size
Options:
--cmd-template CMD llama-server command, e.g. 'llama-server --port ${PORT} -c ${CTX} ...'
--cmd-file FILE executable or shell snippet using $PORT and $CTX
--preset-file FILE preset JSON file (default: Nix-generated presets)
--list-presets list available Nix-generated presets and exit
--low N known/assumed lower context bound
--high N upper context bound to test
--step N stop when high-low <= N (default: 1024)
--prompt-ratio PCT prompt fill target as percent of CTX (default: 90)
--chars-per-token N rough prompt sizing ratio (default: 4)
--prompt-turns N split the prompt across N user/assistant turns (default: 4)
--max-tokens N generated tokens for prompt test (default: 32)
--startup-timeout SEC seconds to wait for /health readiness (default: 300)
--request-timeout SEC seconds to wait for prompt response (default: 600)
--cooldown SEC seconds to sleep after stopping server (default: 5)
--startup-only only test server startup, not prompt/runtime OOM
--verbose print llama-server logs for each failed trial
--keep-logs keep trial logs after a successful run too
-h, --help show this help
Examples:
llama-cpp-bisect-context \
--cmd-template 'llama-server --port ${PORT} -m model.gguf -c ${CTX} -ngl 99' \
--low 32768 --high 196608
llama-cpp-bisect-context qwen3.6-27b-ik-cuda0 --low 32768 --high 180000
llama-cpp-bisect-context --cmd-file ./server-command.sh --low 32768 --high 196608
EOF
}
preset_model=""
preset_file="__LLAMA_CPP_PRESETS__"
list_presets=0
cmd_template=""
cmd_file=""
low=""
high=""
step=1024
prompt_ratio=90
chars_per_token=4
prompt_turns=4
max_tokens=32
startup_timeout=300
request_timeout=600
cooldown=5
startup_only=0
verbose=0
keep_logs=0
while [[ $# -gt 0 ]]; do
case "$1" in
--cmd-template) cmd_template="${2:-}"; shift 2 ;;
--cmd-file) cmd_file="${2:-}"; shift 2 ;;
--preset-file) preset_file="${2:-}"; shift 2 ;;
--list-presets) list_presets=1; shift ;;
--low) low="${2:-}"; shift 2 ;;
--high) high="${2:-}"; shift 2 ;;
--step) step="${2:-}"; shift 2 ;;
--prompt-ratio) prompt_ratio="${2:-}"; shift 2 ;;
--chars-per-token) chars_per_token="${2:-}"; shift 2 ;;
--prompt-turns) prompt_turns="${2:-}"; shift 2 ;;
--max-tokens) max_tokens="${2:-}"; shift 2 ;;
--startup-timeout) startup_timeout="${2:-}"; shift 2 ;;
--request-timeout) request_timeout="${2:-}"; shift 2 ;;
--cooldown) cooldown="${2:-}"; shift 2 ;;
--startup-only) startup_only=1; shift ;;
--verbose) verbose=1; shift ;;
--keep-logs) keep_logs=1; shift ;;
-h|--help) usage; exit 0 ;;
--*) echo "unknown argument: $1" >&2; usage >&2; exit 2 ;;
*)
if [[ -n "$preset_model" ]]; then
echo "unexpected positional argument: $1" >&2
usage >&2
exit 2
fi
preset_model="$1"
shift
;;
esac
done
list_presets_json() {
python3 - "$preset_file" <<'PY'
import json
import sys
with open(sys.argv[1]) as f:
presets = json.load(f)
for key in sorted(presets):
name = presets[key].get("name", "")
print(f"{key}\t{name}" if name else key)
PY
exit 0
}
if (( list_presets )); then
list_presets_json
fi
load_preset() {
local command_file="$tmpdir/preset-command.sh"
python3 - "$preset_file" "$preset_model" "$command_file" <<'PY'
import json
import shlex
import sys
preset_file, model_id, command_file = sys.argv[1:]
with open(preset_file) as f:
presets = json.load(f)
try:
preset = presets[model_id]
except KeyError:
print(f"unknown preset: {model_id}", file=sys.stderr)
print("available presets:", file=sys.stderr)
for key in sorted(presets):
print(f" {key}", file=sys.stderr)
sys.exit(2)
cmd = preset["cmd"].replace("${ctx}", "${CTX}").replace("$ctx", "${CTX}")
env = preset.get("env", [])
with open(command_file, "w") as f:
f.write("set -e\n")
for item in env:
key, sep, value = item.partition("=")
if not sep or not key:
continue
f.write(f"export {key}={shlex.quote(value)}\n")
f.write(cmd)
if not cmd.endswith("\n"):
f.write("\n")
PY
cmd_file="$command_file"
}
require_int() {
local name="$1" value="$2"
if [[ ! "$value" =~ ^[0-9]+$ ]]; then
echo "$name must be a positive integer" >&2
exit 2
fi
}
mode_count=0
[[ -n "$preset_model" ]] && mode_count=$((mode_count + 1))
[[ -n "$cmd_template" ]] && mode_count=$((mode_count + 1))
[[ -n "$cmd_file" ]] && mode_count=$((mode_count + 1))
if (( mode_count != 1 )); then
echo "use exactly one of MODEL, --cmd-template, or --cmd-file" >&2
exit 2
fi
if [[ -z "$low" || -z "$high" ]]; then
echo "missing --low or --high" >&2
exit 2
fi
for pair in \
"--low:$low" \
"--high:$high" \
"--step:$step" \
"--prompt-ratio:$prompt_ratio" \
"--chars-per-token:$chars_per_token" \
"--prompt-turns:$prompt_turns" \
"--max-tokens:$max_tokens" \
"--startup-timeout:$startup_timeout" \
"--request-timeout:$request_timeout" \
"--cooldown:$cooldown"; do
require_int "${pair%%:*}" "${pair#*:}"
done
if (( low <= 0 || high <= low || step <= 0 || prompt_ratio <= 0 || chars_per_token <= 0 || prompt_turns <= 0 )); then
echo "invalid numeric bounds/options" >&2
exit 2
fi
if [[ -n "$cmd_file" && ! -f "$cmd_file" ]]; then
echo "cmd file not found: $cmd_file" >&2
exit 2
fi
for dep in curl python3; do
if ! command -v "$dep" >/dev/null 2>&1; then
echo "missing required command: $dep" >&2
exit 2
fi
done
tmpdir="$(mktemp -d)"
server_pid=""
log_file=""
terminate_server() {
if [[ -z "${server_pid:-}" ]]; then
return 0
fi
kill -- "-${server_pid}" >/dev/null 2>&1 || true
kill "$server_pid" >/dev/null 2>&1 || true
local waited=0
while kill -0 "$server_pid" >/dev/null 2>&1 && (( waited < 30 )); do
sleep 1
waited=$((waited + 1))
done
if kill -0 "$server_pid" >/dev/null 2>&1; then
kill -9 -- "-${server_pid}" >/dev/null 2>&1 || true
kill -9 "$server_pid" >/dev/null 2>&1 || true
fi
wait "$server_pid" >/dev/null 2>&1 || true
server_pid=""
}
cleanup() {
local status=$?
trap - EXIT INT TERM HUP
terminate_server
if (( keep_logs || status != 0 )); then
echo "logs kept in: $tmpdir" >&2
else
rm -rf "$tmpdir"
fi
}
interrupt() {
echo "interrupted; stopping llama-server" >&2
exit 130
}
trap cleanup EXIT
trap interrupt INT TERM HUP
if [[ -n "$preset_model" ]]; then
load_preset
fi
free_port() {
python3 - <<'PY'
import socket
with socket.socket() as s:
s.bind(("127.0.0.1", 0))
print(s.getsockname()[1])
PY
}
start_server() {
local ctx="$1"
PORT="$(free_port)"
CTX="$ctx"
export PORT CTX
log_file="$tmpdir/llama-server-${ctx}.log"
{
printf 'CTX=%s\n' "$CTX"
printf 'PORT=%s\n' "$PORT"
if [[ -n "$cmd_file" ]]; then
printf 'CMD_FILE=%s\n' "$cmd_file"
else
printf 'CMD_TEMPLATE=%s\n' "$cmd_template"
fi
printf -- '--- llama-server output ---\n'
} >"$log_file"
if [[ -n "$cmd_file" ]]; then
setsid bash "$cmd_file" >>"$log_file" 2>&1 &
else
setsid bash -c "$cmd_template" >>"$log_file" 2>&1 &
fi
server_pid="$!"
}
stop_server() {
terminate_server
sleep "$cooldown"
}
print_failure_log() {
local label="$1" ctx="$2"
echo "[$label] ctx=$ctx failed; log: $log_file" >&2
if (( verbose )) && [[ -f "$log_file" ]]; then
sed -n '1,220p' "$log_file" >&2 || true
fi
}
wait_ready() {
local deadline=$((SECONDS + startup_timeout))
while (( SECONDS < deadline )); do
if [[ -n "${server_pid:-}" ]] && ! kill -0 "$server_pid" >/dev/null 2>&1; then
return 1
fi
if curl -fsS --max-time 5 "http://127.0.0.1:${PORT}/health" >/dev/null 2>&1; then
return 0
fi
if curl -fsS --max-time 5 "http://127.0.0.1:${PORT}/v1/models" >/dev/null 2>&1; then
return 0
fi
sleep 2
done
return 1
}
make_prompt_json() {
local ctx="$1"
local approx_tokens=$(( ctx * prompt_ratio / 100 ))
local chars=$(( approx_tokens * chars_per_token ))
python3 - "$chars" "$max_tokens" "$prompt_turns" <<'PY'
import json
import sys
chars = int(sys.argv[1])
max_tokens = int(sys.argv[2])
prompt_turns = int(sys.argv[3])
seed = (
"This is deterministic context filler for memory testing. "
"It uses normal words so token estimates are closer to real prompts. "
)
messages = []
remaining = chars
for turn in range(prompt_turns):
turns_left = prompt_turns - turn
chunk_chars = max(1, remaining // turns_left)
content = (seed * ((chunk_chars // len(seed)) + 1))[:chunk_chars]
messages.append({"role": "user", "content": content})
remaining -= chunk_chars
if turn != prompt_turns - 1:
messages.append({"role": "assistant", "content": "Acknowledged."})
print(json.dumps({
"messages": messages,
"max_tokens": max_tokens,
"temperature": 0,
"stream": False,
}))
PY
}
run_prompt() {
local ctx="$1"
local payload="$tmpdir/prompt-${ctx}.json"
make_prompt_json "$ctx" >"$payload"
curl -fsS \
--max-time "$request_timeout" \
-H 'Content-Type: application/json' \
-d "@$payload" \
"http://127.0.0.1:${PORT}/v1/chat/completions" \
>/dev/null
}
test_startup() {
local ctx="$1"
echo "[startup] testing ctx=$ctx" >&2
start_server "$ctx"
if wait_ready; then
stop_server
echo "[startup] ctx=$ctx PASS" >&2
return 0
fi
print_failure_log startup "$ctx"
stop_server
return 1
}
test_qualified_context() {
local ctx="$1"
echo "[ctx] testing ctx=$ctx with prompt_ratio=${prompt_ratio}% prompt_turns=${prompt_turns}" >&2
start_server "$ctx"
if ! wait_ready; then
print_failure_log ctx-startup "$ctx"
stop_server
return 1
fi
if run_prompt "$ctx"; then
stop_server
echo "[ctx] ctx=$ctx PASS" >&2
return 0
fi
print_failure_log ctx-prompt "$ctx"
stop_server
return 1
}
bisect_max() {
local label="$1" pass="$2" fail="$3" fn="$4"
while (( fail - pass > step )); do
local mid=$(( (pass + fail) / 2 ))
if "$fn" "$mid"; then
pass="$mid"
else
fail="$mid"
fi
done
printf '%s:%s:%s\n' "$label" "$pass" "$fail"
}
if (( startup_only )); then
if ! test_startup "$low"; then
echo "low bound does not pass startup: $low" >&2
exit 1
fi
result="$(bisect_max startup "$low" "$high" test_startup)"
pass="$(cut -d: -f2 <<<"$result")"
fail="$(cut -d: -f3 <<<"$result")"
printf '\nResult:\n'
printf ' startup max passing ctx: %s\n' "$pass"
printf ' startup min failing ctx: %s\n' "$fail"
python3 - "$pass" "$fail" <<'PY'
import json
import sys
max_passing, min_failing = map(int, sys.argv[1:])
print(json.dumps({"startup": {"maxPassingCtx": max_passing, "minFailingCtx": min_failing}}, indent=2))
PY
exit 0
fi
if ! test_qualified_context "$low"; then
echo "low bound does not pass qualified context test: $low" >&2
exit 1
fi
result="$(bisect_max context "$low" "$high" test_qualified_context)"
pass="$(cut -d: -f2 <<<"$result")"
fail="$(cut -d: -f3 <<<"$result")"
printf '\nResult:\n'
printf ' context max passing ctx: %s\n' "$pass"
printf ' context min failing ctx: %s\n' "$fail"
printf ' prompt ratio: %s%%\n' "$prompt_ratio"
printf ' prompt turns: %s\n' "$prompt_turns"
python3 - "$pass" "$fail" "$prompt_ratio" "$prompt_turns" <<'PY'
import json
import sys
max_passing = int(sys.argv[1])
min_failing = int(sys.argv[2])
prompt_ratio = int(sys.argv[3])
prompt_turns = int(sys.argv[4])
print(json.dumps({
"context": {
"maxPassingCtx": max_passing,
"minFailingCtx": min_failing,
"promptRatio": prompt_ratio,
"promptTurns": prompt_turns,
}
}, indent=2))
PY

View File

@@ -1,4 +1,4 @@
{ inputs, ... }:
final: _prev: {
firefox-addons = inputs.firefox-addons.packages.${final.system};
firefox-addons = inputs.firefox-addons.packages.${final.stdenv.hostPlatform.system};
}

View File

@@ -5,12 +5,12 @@
buildGoModule rec {
pname = "conduit";
version = "unstable-2026-05-03";
version = "unstable-2026-05-15";
src = fetchgit {
url = "https://gitea.va.reichard.io/evan/conduit.git";
rev = "9edea27148670b208c935c070ff3f58a416241b1";
hash = "sha256-s8/ghyoAyFOvAMhE7vzckEZ8OxIF116OyJ4Uj30s65A=";
rev = "8dfb14f1e7f952bee92cad29703dba55fb156f0c";
hash = "sha256-Fc0FHLCNBbEpOFFD0bHSDo1E5AsOzL2fJzHufleKBIo=";
};
vendorHash = "sha256-LOFT8eCNRm5Q2tVl7ifu4dB5cr828B/E2NJW5WiW0LI=";

View File

@@ -0,0 +1,35 @@
# llama-cpp — Agent Notes
Override of `pkgs.llama-cpp` with CUDA + Vulkan + BLAS, custom CMake flags, and an optional fork pin.
## Pitfalls
### `version` must be numeric
Upstream `pkgs/by-name/ll/llama-cpp/package.nix` passes `version` straight through as a C integer via:
```nix
(cmakeFeature "LLAMA_BUILD_NUMBER" finalAttrs.version)
```
`build-info.cpp` then emits `int LLAMA_BUILD_NUMBER = <version>;`. A non-numeric `version` (e.g. `"mtp-clean-08b1474"`) breaks the build with:
```
error: '<value>' was not declared in this scope
int LLAMA_BUILD_NUMBER = <value>;
```
**Convention:**
- Upstream tag pins: use the bare build number, e.g. `version = "9048";` with `tag = "b${version}";`.
- Fork / arbitrary commit pins: use a `YYYYMMDD` date derived from the commit's author/commit date (`gh api repos/<owner>/<repo>/commits/<sha>``.commit.committer.date`).
### `leaveDotGit` + `postFetch`
We keep `.git` only long enough to record the short SHA into `$out/COMMIT`, then strip it. Preserve this pattern when changing `src` so downstream tooling that reads `COMMIT` keeps working.
## Refreshing the pinned commit (fork)
1. `git ls-remote https://github.com/<owner>/llama.cpp refs/heads/<branch>` → get the full SHA.
2. `nix run nixpkgs#nix-prefetch-github -- <owner> llama.cpp --rev <sha> --leave-dot-git` → get the hash.
3. Look up the commit date: `curl -s https://api.github.com/repos/<owner>/llama.cpp/commits/<sha> | jq -r '.commit.committer.date'`.
4. Update `src.{owner,rev,hash}` and set `version = "YYYYMMDD"`.

View File

@@ -1,4 +1,22 @@
{ pkgs }:
let
# Version MUST be an integer string.
# For tagged releases use the tag number (e.g. "9222").
# For HEAD builds use YYYYMMDD (e.g. "20260519").
version = "9496";
src = pkgs.fetchFromGitHub {
owner = "ggml-org";
repo = "llama.cpp";
rev = "94a220cd6745e6e3f8de62870b66fd5b9bc92700";
hash = "sha256-1jAowfGVzrrHDwWWzKESY7aV82whnuIg1N37fmtcgyw=";
leaveDotGit = true;
postFetch = ''
git -C "$out" rev-parse --short HEAD > $out/COMMIT
find "$out" -name .git -print0 | xargs -0 rm -rf
'';
};
in
(pkgs.llama-cpp.override {
cudaSupport = true;
blasSupport = true;
@@ -6,19 +24,13 @@
metalSupport = false;
vulkanSupport = true;
}).overrideAttrs
(oldAttrs: rec {
version = "9048";
src = pkgs.fetchFromGitHub {
owner = "ggml-org";
repo = "llama.cpp";
tag = "b${version}";
hash = "sha256-lYtX0hLReCnFw1+xOKefly+WunuoN89ZFEFl5mK5pQ4=";
leaveDotGit = true;
postFetch = ''
git -C "$out" rev-parse --short HEAD > $out/COMMIT
find "$out" -name .git -print0 | xargs -0 rm -rf
'';
};
(oldAttrs: {
inherit version src;
# WebUI npm deps hash for our pinned src. Upstream nixpkgs builds the WebUI
# from tools/ui via `npm run build` in preConfigure (offline, using these
# deps), so no custom webui derivation / HF-bucket workaround is needed.
npmDepsHash = "sha256-1iM0LGeI9e+gZEHk46lkBe51DxIhiimfAm9o3Z3m9Ik=";
# Add SPIR-V Headers for Vulkan Backend
# Newer llama.cpp requires spirv/unified1/spirv.hpp which isn't
@@ -36,14 +48,4 @@
export NIX_ENFORCE_NO_NATIVE=0
${oldAttrs.preConfigure or ""}
'';
# Apply Patches
patchFlags = [ "-p1" ];
patches = (oldAttrs.patches or [ ]) ++ [
(pkgs.fetchpatch {
name = "mtp.patch";
url = "https://github.com/ggml-org/llama.cpp/pull/22673.patch";
hash = "sha256-HqpchhOpxuw5mY4a/OCWGDr2Y32rC4FeOHuhaVt+mvY=";
})
];
})

View File

@@ -13,13 +13,13 @@ let
in
buildGo126Module (finalAttrs: {
pname = "llama-swap";
version = "208";
version = "216";
src = fetchFromGitHub {
owner = "mostlygeek";
repo = "llama-swap";
tag = "v${finalAttrs.version}";
hash = "sha256-E+BqqQcCLlW/DWvjwC66ClV6yuQ5x7cAMkLPJkS3x5M=";
hash = "sha256-PHSY4z2h406xL+EcIYyrzr4s28txO7SCsWm8hrXf+2U=";
# populate values that require us to use git. By doing this in postFetch we
# can delete .git afterwards and maintain better reproducibility of the src.
leaveDotGit = true;
@@ -32,10 +32,10 @@ buildGo126Module (finalAttrs: {
'';
};
vendorHash = "sha256-tOOZgugiVcICYg9HyeTolyAg+YZWtxSJTvAuwfMazHQ=";
vendorHash = "sha256-QysQ7YdwJcLTziwL25j73n3tQVvzVQIFxN4GkTU8JZg=";
passthru.ui = callPackage ./ui.nix { llama-swap = finalAttrs.finalPackage; };
passthru.npmDepsHash = "sha256-6D4F58sSBkr7FKKO34gDhnZ9uN/SfsyYn1xJjYsMeq4=";
passthru.npmDepsHash = "sha256-NJqEJ+XTdpPFtJJxP4CGu+JDUW7lKDcFgsixQJ3SXtQ=";
nativeBuildInputs = [
versionCheckHook

View File

@@ -0,0 +1,25 @@
{ lib
, buildGoModule
, fetchgit
}:
buildGoModule rec {
pname = "open-proxy";
version = "unstable-2026-06-16";
src = fetchgit {
url = "https://gitea.va.reichard.io/evan/open-proxy.git";
rev = "a589341214a1e035b6ce2b2d79870e591a25ccca";
hash = "sha256-onfvxOl4TdeRrVLD1oJWcnhEDzKFYU/V0qxV1+NpQrg=";
};
vendorHash = null;
meta = {
description = "Forward `open`/`xdg-open` from a remote VM to the host machine";
homepage = "https://gitea.va.reichard.io/evan/open-proxy";
license = lib.licenses.mit;
maintainers = with lib.maintainers; [ evanreichard ];
mainProgram = "open-proxy";
};
}

View File

@@ -0,0 +1,23 @@
# pi-coding-agent Packaging Notes
`pi-coding-agent` is built from the `earendil-works/pi-mono` monorepo with `buildNpmPackage`.
## Lockfile Metadata
Upstream `package-lock.json` may omit `resolved` / `integrity` metadata that npm can recover online, but Nix needs for its offline npm cache. Keep a package-local enriched lockfile at `packages/pi-coding-agent/package-lock.json` and copy it in during `prePatch` before `npmConfigHook` validates/generates `npmDeps`.
After bumping `version` in `default.nix`, regenerate it with:
```bash
node packages/pi-coding-agent/update-lockfile.mjs
# or explicitly:
node packages/pi-coding-agent/update-lockfile.mjs 0.74.0
```
Then refresh `npmDepsHash` from the FOD mismatch:
```bash
nix build .#packages.aarch64-linux.pi-coding-agent.npmDeps --no-link
```
Remember: new files must be `git add`ed before the flake can see them.

View File

@@ -3,6 +3,8 @@
, fetchFromGitHub
, nodejs
, nodejs_22
, firefox
, geckodriver
, makeWrapper
, pkg-config
, pixman
@@ -16,19 +18,24 @@
buildNpmPackage rec {
pname = "pi-coding-agent";
version = "0.73.1";
version = "0.78.1";
src = fetchFromGitHub {
owner = "earendil-works";
repo = "pi-mono";
rev = "v${version}";
hash = "sha256-ZcqMWghMACzEUswLujwClPF1pbwjTKzTbcYW86ZvjL4=";
hash = "sha256-K5+reVdi9LPwUHxFgM1iFWojuj6M/m25ymhkDOQdBE4=";
};
npmDepsHash = "sha256-tneAcwtTIfkcqQ8/Ch1Xa6OiOkTjJNYbH8wfhNneT/g=";
npmDepsHash = "sha256-PknwCOAr61Fq2Mhl6jd79Rdsje1OXFts2MDLM/gIEYE=";
nativeBuildInputs = [ pkg-config makeWrapper ];
# Restore NPM Metadata - upstream lockfile omits resolved/integrity entries needed by buildNpmPackage.
prePatch = ''
cp ${./package-lock.json} package-lock.json
'';
buildInputs = [
pixman
cairo
@@ -41,7 +48,7 @@ buildNpmPackage rec {
# Skip generate-models in ai package (models.generated.ts already in repo)
preBuild = ''
substituteInPlace packages/ai/package.json \
--replace-fail '"build": "npm run generate-models && tsgo -p tsconfig.build.json"' \
--replace-fail '"build": "npm run generate-models && npm run generate-image-models && tsgo -p tsconfig.build.json"' \
'"build": "tsgo -p tsconfig.build.json"'
'';
@@ -73,7 +80,12 @@ buildNpmPackage rec {
chmod +x $out/bin/pi
wrapProgram $out/bin/pi \
--prefix PATH : ${lib.makeBinPath [ nodejs_22 ]}
--prefix PATH : ${lib.makeBinPath [
nodejs_22
# evan/pi-web - Browser automation tools are needed for web-fetch support.
firefox
geckodriver
]}
runHook postInstall
'';

6375
packages/pi-coding-agent/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,92 @@
#!/usr/bin/env node
import fs from 'node:fs/promises';
import path from 'node:path';
import process from 'node:process';
const repoRoot = new URL('../..', import.meta.url);
const packageDir = new URL('.', import.meta.url);
const defaultNixPath = new URL('default.nix', packageDir);
const lockfilePath = new URL('package-lock.json', packageDir);
const registryCache = new Map();
// Version Selection
async function getVersion() {
const argVersion = process.argv[2];
if (argVersion) return argVersion.replace(/^v/, '');
const defaultNix = await fs.readFile(defaultNixPath, 'utf8');
const match = defaultNix.match(/version\s*=\s*"([^"]+)";/);
if (!match) throw new Error(`Unable to find version in ${defaultNixPath.pathname}`);
return match[1];
}
// Package Name Extraction
function packageNameFromLockPath(lockPath) {
const parts = lockPath.split('/');
const idx = parts.lastIndexOf('node_modules');
if (idx < 0 || idx + 1 >= parts.length) return null;
const first = parts[idx + 1];
if (first.startsWith('@')) return `${first}/${parts[idx + 2]}`;
return first;
}
// Registry Fetching
function registryUrl(name, version) {
const encodedName = name.startsWith('@') ? name.replace('/', '%2f') : name;
return `https://registry.npmjs.org/${encodedName}/${version}`;
}
async function fetchPackageMetadata(name, version) {
const key = `${name}@${version}`;
if (registryCache.has(key)) return registryCache.get(key);
const response = await fetch(registryUrl(name, version), {
headers: { accept: 'application/json' },
});
if (!response.ok) {
throw new Error(`Failed to fetch ${key}: HTTP ${response.status}`);
}
const metadata = await response.json();
const dist = metadata.dist;
if (!dist?.tarball || !dist?.integrity) {
throw new Error(`Missing dist.tarball/dist.integrity for ${key}`);
}
registryCache.set(key, dist);
return dist;
}
// Lockfile Enrichment
async function enrichLockfile(lock) {
let updated = 0;
for (const [lockPath, entry] of Object.entries(lock.packages ?? {})) {
if (!lockPath || entry.link || !entry.version || entry.resolved) continue;
if (!lockPath.includes('node_modules')) continue;
const name = packageNameFromLockPath(lockPath);
if (!name) continue;
const dist = await fetchPackageMetadata(name, entry.version);
entry.resolved = dist.tarball;
entry.integrity = dist.integrity;
updated += 1;
}
return updated;
}
// Main
const version = await getVersion();
const lockUrl = `https://raw.githubusercontent.com/earendil-works/pi-mono/v${version}/package-lock.json`;
const response = await fetch(lockUrl, { headers: { accept: 'application/json' } });
if (!response.ok) throw new Error(`Failed to fetch ${lockUrl}: HTTP ${response.status}`);
const lock = await response.json();
const updated = await enrichLockfile(lock);
await fs.writeFile(lockfilePath, JSON.stringify(lock, null, 2) + '\n');
const displayPath = path.relative(repoRoot.pathname, lockfilePath.pathname);
console.error(`Wrote ${displayPath} for v${version}; restored metadata for ${updated} entries.`);

View File

@@ -11,16 +11,18 @@
}:
buildNpmPackage (finalAttrs: {
pname = "qwen-code";
version = "0.4.0-nightly.20251209.a6a57233";
version = "0.16.0-preview.0";
src = fetchFromGitHub {
owner = "QwenLM";
repo = "qwen-code";
tag = "v${finalAttrs.version}";
hash = "sha256-s9m1IN6jDDbNPr/vI/UcrauYPiyQTDODarLP3EvnG3Y=";
hash = "sha256-UAJNw1RjHRoZqtgIWJ1dOTWnE9LoBpfJCAM0Jay+VPI=";
};
npmDepsHash = "sha256-ngAjCCoHLPZ+GgBRmAKbRYaF7l+RK3YGf1kEkwFbyQg=";
npmDepsHash = "sha256-uJtOeNnhbGE7EzTwkNbg2EHLonjHCbdPH5rcV2bgQUw=";
makeCacheWritable = true;
npmFlags = [ "--legacy-peer-deps" ];
nativeBuildInputs = [
jq
@@ -34,8 +36,13 @@ buildNpmPackage (finalAttrs: {
libsecret
];
postPatch = ''
prePatch = ''
${jq}/bin/jq '.dependencies."iconv-lite" = "^0.7.0"' \
packages/core/package.json > packages/core/package.json.tmp
mv packages/core/package.json.tmp packages/core/package.json
${jq}/bin/jq '
.packages."packages/core".dependencies."iconv-lite" = "^0.7.0" |
del(.packages."node_modules/node-pty") |
del(.packages."node_modules/@lydell/node-pty") |
del(.packages."node_modules/@lydell/node-pty-darwin-arm64") |
@@ -62,9 +69,26 @@ buildNpmPackage (finalAttrs: {
' package-lock.json > package-lock.json.tmp && mv package-lock.json.tmp package-lock.json
'';
preBuild = ''
mkdir -p node_modules/@lydell/node-pty
printf '%s\n' \
'export interface IPty {' \
' pid: number;' \
' onData(callback: (data: string) => void): void;' \
' onExit(callback: (event: { exitCode: number; signal?: number }) => void): void;' \
' kill(signal?: string): void;' \
' write(data: string): void;' \
' resize(columns: number, rows: number): void;' \
' removeListener(event: string, listener: (...args: unknown[]) => void): void;' \
' exitCode?: number;' \
'}' \
> node_modules/@lydell/node-pty/node-pty.d.ts
'';
buildPhase = ''
runHook preBuild
npm run generate
npm run build
npm run bundle
runHook postBuild
'';
@@ -75,6 +99,14 @@ buildNpmPackage (finalAttrs: {
cp -r dist/* $out/share/qwen-code/
npm prune --production
cp -r node_modules $out/share/qwen-code/
if [ -d $out/share/qwen-code/vendor/ripgrep ]; then
find $out/share/qwen-code/vendor/ripgrep -type f -name rg -exec sh -c '
for rg; do
rm "$rg"
ln -s ${ripgrep}/bin/rg "$rg"
done
' sh {} +
fi
find $out/share/qwen-code/node_modules -type l -delete || true
patchShebangs $out/share/qwen-code
ln -s $out/share/qwen-code/cli.js $out/bin/qwen

View File

@@ -0,0 +1,114 @@
{ lib
, stdenv
, maven
, fetchFromGitHub
, jdk17
, jre
, swt
, makeWrapper
, wrapGAppsHook3
, pkg-config
, alsa-lib
, jack2
, fluidsynth
, libpulseaudio
, lilv
, suil
, qt5
, which
}:
maven.buildMavenPackage rec {
pname = "tuxguitar";
version = "2.0.1";
src = fetchFromGitHub {
owner = "helge17";
repo = "tuxguitar";
rev = version;
hash = "sha256-USdYj8ebosXkiZpDqyN5J+g1kjyWm225iQlx/szXmLA=";
};
mvnHash = "sha256-XTODH8SG7iwhACJT4AbIokORUe00r6theV18TEXbrIs=";
doCheck = false;
mvnJdk = jdk17;
nativeBuildInputs = [
makeWrapper
pkg-config
wrapGAppsHook3
];
buildInputs = [
alsa-lib
fluidsynth
jack2
lilv
qt5.qtbase
suil
];
mvnFetchExtraArgs = {
inherit buildInputs;
dontWrapQtApps = true;
};
postPatch = ''
substituteInPlace desktop/build-scripts/native-modules/tuxguitar-synth-lv2-linux/pom.xml \
--replace-fail /usr/include/lilv-0/lilv ${lib.getDev lilv}/include/lilv-0/lilv \
--replace-fail /usr/include/suil-0/suil ${lib.getDev suil}/include/suil-0/suil
if [[ "$name" == maven-deps-* ]]; then
mvn install:install-file \
-Dfile=${swt}/jars/swt.jar \
-DgroupId=org.eclipse.swt \
-DartifactId=org.eclipse.swt.gtk.linux \
-Dpackaging=jar \
-Dversion=4.36 \
-Dmaven.repo.local=$out/.m2
fi
'';
mvnParameters = "-f desktop/build-scripts/tuxguitar-linux-swt/pom.xml verify -P native-modules";
dontWrapGApps = true;
dontWrapQtApps = true;
installPhase = ''
runHook preInstall
mkdir -p $out/bin
cp -r desktop/build-scripts/tuxguitar-linux-swt/target/tuxguitar-*-linux-swt/{dist,lib,share,tuxguitar.sh} $out/
ln -sf ${swt}/jars/swt.jar $out/lib/swt.jar
ln -s ../tuxguitar.sh $out/bin/tuxguitar
runHook postInstall
'';
postFixup = ''
wrapProgram $out/tuxguitar.sh \
"''${gappsWrapperArgs[@]}" \
--prefix PATH : ${lib.makeBinPath [ jre which ]} \
--prefix LD_LIBRARY_PATH : "$out/lib:${lib.makeLibraryPath [
swt
alsa-lib
fluidsynth
jack2
libpulseaudio
lilv
qt5.qtbase
suil
]}"
'';
meta = {
description = "Multitrack guitar tablature editor";
homepage = "https://github.com/helge17/tuxguitar";
license = lib.licenses.lgpl2;
maintainers = with lib.maintainers; [ evanreichard ];
mainProgram = "tuxguitar";
platforms = lib.platforms.linux;
};
}

View File

@@ -1,3 +1,4 @@
open_proxy_token: ENC[AES256_GCM,data:LxC0dR2EQ8XPmw4fwKnKJD3usqImMKH+81I9RSTNsjg=,iv:LQmf+kxtwgAMxcHvIe6y3Qw/oxXvdWGbyV/kdwPpKw4=,tag:TUQbM8sIK6KB7eOsYfHuqw==,type:str]
conduit_apikey: ENC[AES256_GCM,data:4mjvEI00V7nAhPkDa97eOfLCqItxoRALFe8OdxzUiUc=,iv:2mtSu1LDUvaneTnqs3Z4GVAj+HuAY2+VXrpIITSg/64=,tag:trkgxyX0ssCjyKMB42bFGg==,type:str]
context7_apikey: ENC[AES256_GCM,data:K8/OoJMWBhN3ufmTa/tAiD3iMergDZQ1OBucUtLsrg+L26DXDPAko9D41w==,iv:/IVpaaPivUTn2rbIAPIwyN5nb7TmtDh05YlMdOlBkhE=,tag:0XJfoNlDelBwMXMAAqKjtQ==,type:str]
zai_apikey: ENC[AES256_GCM,data:eNgIfEqs8JGM7Qo6D5KMMqRF8fd1qLakYQ9F5oEDUvLqPJ+TAktz8GMVuSndwW5BxA==,iv:eR8IR/MDmhk2JUoT2chCwRYOJGfxEBFGARf1CI7EG8Q=,tag:3fmRWA5eof304WSWKntDFg==,type:str]
@@ -5,8 +6,7 @@ kagi_token: ENC[AES256_GCM,data:6pxxMMQ3RCy6sdUFiuAy8rUzsIMMiBgPzphpgTVMfiHC98ej
rke2_kubeconfig: ENC[AES256_GCM,data:DmmaV5bSnSSbLfenT7/xsv9qq5V1s2b9mzdeOe5JbhXLcvC9RRX3z1TkAwdC9IEAtr0cIiPigJS2fUCo0/baYSZ+lKTZ6pUmuPwX0x1g2O8Vdfe7jTTnTDnZ/A8+CIrS79uhsNxlmQNpEOCCSAOQ4+XAnFbPbLh/0QhV2M3a19ocJBQnFyNpYCxverRvNIfgHOoMskvwn3MEsmp6foOGnwPsbeQ1RRiIyCmf7c6jJQH7O5qDLcTIFNYNKiorr8veRhI5av0eX+5/rM8wWgBVNo/lf4TJnX+ufIUZQYCIz4vpfaw8N1jcpiAJiUFGdlKX+AR9b3ti8owa5+JmQkLNp4GBEI+I0tdMp15K6RjKqkKrkPujtUFntxXC07r+eQ37oUUvS9qilIMrkX0TxWoooShgOgQfVUEAEdtb2o830TL1FFZHTiy5RBkeRQxol4yAW/M0B0S4iIj/W07UHNdp5tBaPotsdyj9QQrumYS67GwWolVW007pG8nvD/lvP55nAndsLZpHAYSFI6z1N5ayx0N4I8OP+dT5ElaQv/tt/KO69EQYEwJetgRLnMQ34WKfAr3akLYja6QxkrhEnhfa60mXP9QLynEWGsfYdMUjPioIiImvdRi+5FkyvQ7aZyVzCRsMNGL3I5f1dXWz2wS+B1oB9yimOpfz4wr2794w64EKO1gF5dso17ebVEBuT8myeOenZREVUJCEunYcFPsMDD8bI+VD/VJDwQI/aWmukBWW9dztySiAJA0RWOb69LeApgx2SUwcPnx1yLerb5FWjA8hzY6GKGKyO8cNMRbH/l6QjAL/oMg3cgi7dH/7o1dGSphvGpTAOmcb82ZiT4gMSHhKIrxdLKyZclGu4Rf/mSjadGzLrEA6qj5r07wJOxZHu2bcMafWnoUZBuo7yE/ogVkruW1vI9c4lNBsOIUescE4sE5qjRncJkPEh9pcwWWLFnRQVCxVSpp72VyeJTxvo9gEBHuZGFF3J7R3YsTq1YhcXAR5+PIseIY46rdxqIh2WmVfG4W/iteuQh+JEcspvHNMB7a7j3yTEOHQ7ILaslLYDDnma4qo6SPuTzhx3Tbkx1WN5FelVkw1INV6qSjL83ghfk5nOVumbYurrTXviqqWg5ikCJ/Ewy2nrpNMbeVMs0x9Wcdpe7xi405IdJm6ry5Ipo9ZMKNJxRDP+ebUBgfiB3WzVI68AvvTpePz3KxGDwzh2aWu3Ei7CJoBrCrkEk2DPxoGvBinOvslZYuGhvUqL5XuNoDjLuxNCY1Dt67dvLi3ydiekZU8mNQ1qraMSFBg4KXH9e718X1zjuAGv86TVUfllxiXeoo6L/Sgn2iO1YW5w6igO5qkuIYEIi1rpx0jFOrbgvZeU6qjBHQmFwEw7h03IVw54s62E2dCy1wSq1BcUu1jUR5iJ0mPJ8ajGhb4D7MRO2wanAQxrzKJhSJ1OAdCzrebprLJRoo1v4YySiQkZ5cD57YnABST+i3/u0aWcS3xDi8Z/NKr0TMyvf2rWpvlOYUfIDgZLQiHBiph0UZNk/XNpvH23e6lEHG/ztmIu8CbcAuAbAy7Qwf664UGq9cK82gklvebO6lo5vCUGpx8mLOQYLIOefdoDJEei3DoTeZvtOpLkNXnRlwSlCY4geCNOioU3H6mtF4JdlLSFM7QMt/4CpMGEEzXDVCu7GI2Gem7VmBuLOBhGYxiF1zG+D6ZKUxOx1rmo7f2flgdfEtlkpQrIbeZfVEnqgb5z9Vw8bziW+Kc5CJyo9iV09BK0aeZWstnR6SKIWwuImWrGM0zSQBHd8QdrgicuR416PnFuElT3dkrF3TedLTKWKasWhlGOYeILMzCz/dnwy85ihp5zc10AbIpLISAvHSaMtEgdiwIc2n2Ti94ntnwfIB3AG20X3/yljDZKezn95+SZV8jOhMk/OcrpGH3UTB7ezHyf6gVD0qLXM6xUgi5vehhsO8ihFFTSNHf0881fimokRHQPjMJ9NC3J26JLhqJEs0Zfwvx2+7NrGq0pnRQ0W0FExy14dWWx270/EF8L91YRFohCJidJYgD5oTEab3eG1itM+OAAIA5xG+g5N6Re+34yO3JDMVUfuxGYpxKxfF8eVvAXB46+CH/lXQH6cPO8plWmlrzT8TS3rPj8MtvXrNaCVHuoWQ3oNu0cSGtsgcX/kJ8P8rOV8wNEdj2EpA8Fq8o741OapgFxpc5fMN3gKc6n7uwnKBHMMaQsZ2ymWeEn+qa6f/Me0DlyOkVLQFTYqAKibOoeyCtNILMf/NPMFMLe7Oktl6f9dvsMtu1zwCSYAV83/Ti8sZdnPFLET4OttQB+Bk8IX7BeTndAc3uMrVuOBwJC4sh2uGvRIJPEYbfw6p2amcn8mylN1o+l6sntpMrudEFo+oAz6M4UCEoNpotRIIMBB+uZo8T876TxSbENz31IDrOJ8ka4hCE0dK/gxZQsnoa16VvouTSc92se52n3RdELu+q9Oeeubd4htUJHhqxpbSmEQGVTWH2kJT7c76NZu7B634aUtoYI5eYmcLB3zsqVwZeG9fpzNeHMS2C6qzT4uKjxmbTW8eicDYSZSyJg4rpEjxf9GZn8f4898JtGvKF+esthhOlaxCEFcVoPQLC4pOAzY/TN+XTPk55bEPQ5LP3cByTfIv1UnZvSjXpAz2m+DlbkGfEqiICHr4HkISJS297CKI05tjpQNfO1Ylp89uL1hqwcddRee6+34kb+1XjSYwKMooZvSf/pasjj5xlpbCtxkiEIHp996H4MNXaySjZ7QU49Yy3EAKjulbc7xQXpkSUZb5Rh8yge/KpCK/5gK/fwlYIJ3tR4mIMN/b7HmfulBmktekK0G2fxagoKFot3DOwY1OnwIJhNzo0fCS3qFe9r2Ixs76C78gXo+DChKxYcYS7N5wmQy1PxKOsBpxeiWyaHCIC8Ey5dPSEoYi0zr+1A6wbkPaCrKI/C9Bwu8f04ySCBdbQQa52uCdWM2ctIsPHZNzkWNcXCfz9XLQZYZG/bQJtj+lDXgKKd8AiF0+WKzinGWHkKQbZPVYJI0s0jLXfH3G+kg5QztkfXedHNY6y0F5hh15jRYWsLAn6Ls2NJVP9SmCvl+pj6wHRKqDV7SbHz2bqCwLzzD68Wvv7b44OLFZEAH8F1TW8D205Dw7YFDiLMnWjeWv8ZX5CNUi7tkiCeORdQGiiv6n3CItvqWQ00tKnO5h+4/E97PtsSgfqG+Pgyjjscz4z02stB1XuaGbfPKe604CeYcmhjf9mV8gjfewOYTs+E2j6aoSKJbOtdiWfIHNngn92D/tOLB2UsuErYd3ZGLoH2yBOVL1rLnkY1QG4JxzXCwpnauQTzHRmg/WpJXurETbVyzFkzkF9YnX9iuJFt1mZglZuaO0JtC93xhdjMqjJyau4Oba7ZwwzyvM4TkgvffbPZit5nNAAZO0HO4r3t5nMIJeS1uBin66pjUWEuZ8ptjA5iYuAJwAg2VOPUuDKc9HBYmQjJ+Yx+KhO0aDmJd2pV/R6TOKua6jAnmny2FFqbyC3znPTfJZHOnVPZZ0V1D0NJ8dP62EDBebTnoFScE+93xyB9ETE3ubrBACAPTMuCQlqAY5ix2F1MpCCT7c0ovMyRlDaUJyFXS93UfJf4MmQg1+yCHuGX9E/bqUXsXC6SdJU9PdJ/3QdhN7iIcy8rPxOfYjJSwFrdUZn9sJcrVsbbPuObOM9HPOtp/E2xTOlYkfIeValLFxBb2lEBZghYXT05X82fSoe9ZBZdJfIMmLnuh/bP++bBVU7/y/aCYFcRCOafDdLQrTOpJ62SIDP0zN0YkmRy41qNlSaMp0ZZpxiK45ihqGP4aqWBmGC822dTsY4og1V07rRnfdG2wQcW+VHLldg72vRoc2brhfRZAOvwHQQCy0NxMD5pCgoSXJD4qsJrOFBcsmbs8YZ2omHzkVf+9ybnUD4WNCjVIvM,iv:CMrKYb+2QZVKEJMjW51rbiYW/cN6ATDzgwfBdSi9B10=,tag:qVqO5byXdj7DZdaHNx7S9A==,type:str]
sops:
age:
- recipient: age1sac93wpnjcv62s7583jv6a4yspndh6k0r25g3qx3k7gq748uvafst6nz4w
enc: |
- enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBrY0pseTB5R290aXAzdnB3
aTVMS2tBdnEvSjR5K241dElEa1Z1Tmt2a0ZJCnRSbVBsZ3lNVlNzdnFlY3VvaWpB
@@ -14,8 +14,8 @@ sops:
S3hOa3p1MHVLek5zaUxhWXFiV2Z3R0UKxSrnYSoN6KcuFdg5K6qwcwh9/j9lI0HB
HqujumuIfWkcctNk38AMn4beeesmXsbJQcUPHUVOZQw6Ov4jXaGz/Q==
-----END AGE ENCRYPTED FILE-----
- recipient: age17ayje4uv2mhwehhp9jr3u9l0ds07396kt7ef40sufx89vm7cgfjq6d5d4y
enc: |
recipient: age1sac93wpnjcv62s7583jv6a4yspndh6k0r25g3qx3k7gq748uvafst6nz4w
- enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBMazJ5VmNXQ2FRYzFuejBU
bWNPWnI2a2R3NVBpY0Y2N0lXd1RXbUdmTkc0ClZDTSs4bklpeVEyMDNrVXJXRHRF
@@ -23,8 +23,8 @@ sops:
a3dPbEZtL0tsOWpWSEtPblZSYk9NVTgKM6TfK7VX6v059FXpRjpAlgX+ab4f6vq2
jH8jyO33YxQYI1kSgXJ5AR8evCoV3FzbZ5rzIy2PRmCOwFV8Im2bRg==
-----END AGE ENCRYPTED FILE-----
- recipient: age1mar507c9mxmwalg486chs5kfh0mya38rv5w64ypfwnwlawewrpnswerpg8
enc: |
recipient: age17ayje4uv2mhwehhp9jr3u9l0ds07396kt7ef40sufx89vm7cgfjq6d5d4y
- enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSB4S0NrbGF4WXN5KzY4S0lt
ZW1PMWE5NUF6NEIyWmp6RWY4MzNYbE0wQnpFCkdOaGtIWmtJT08zNmZNNzcvQldp
@@ -32,8 +32,8 @@ sops:
VFJrQWNnNTJ4azVpMElHbmsrZnJPM2MKx/7XxkZfd1tPMck9FmoM6g28dp5JeXQ5
OdiOLlKc2If1f6dLKkjDmmscMui6aLMQ8RJ8dLK7FKlYy+95VsHVrw==
-----END AGE ENCRYPTED FILE-----
- recipient: age1w6avj7gd4f5frk90lsyh4e2k5am6z92hzlr0vpgrm767muyj59qsnuah62
enc: |
recipient: age1mar507c9mxmwalg486chs5kfh0mya38rv5w64ypfwnwlawewrpnswerpg8
- enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBnZFpKSVpWMCt6Z3M5UGUz
YVI4TG85UVh1eXYwVStQWk5xdEhhSjdNTzFJCk1mNnBtSVV0VFB0TnFIeVdocWFP
@@ -41,8 +41,8 @@ sops:
MzNuSHlWWnNMYkJMMHZkcnh0cXZaREkKE+j0yWV/zK8lz8vRa0cywpLL2DiAFsgi
fgCdeysSacrQLxB8iBWbusJ31ktyJMYLrsWFAdPkl0WN6HjaR3k3CQ==
-----END AGE ENCRYPTED FILE-----
- recipient: age1avlhszrryt4gf4ya536jhzm7qwt9xfttm8x4sns6h9w2tahzqp8sspz9y5
enc: |
recipient: age1w6avj7gd4f5frk90lsyh4e2k5am6z92hzlr0vpgrm767muyj59qsnuah62
- enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBDUFYyTzdpQlJ4VVJJbDF5
cEhIQ1NSRVcwRUFBUzdaeXdkOWhPVjFIdGtBCk5qVmx1L09kVGhiNnlaQ0xoeTVz
@@ -50,8 +50,8 @@ sops:
VmFINkwvZEFuWnkyb2JpTCtmRUVBbUkKKzbifH6Ue84MkpaLHrwDvJu8uvjY7yOY
+qYg0rOqFuZAx9YiOjDR7JVeGpfHM+7pO9ZjSNTPH0f1NC3XwsNp1A==
-----END AGE ENCRYPTED FILE-----
- recipient: age1dccte7xtwswgef089nd80dutp96xnezx5lrqnneh9cusegsnda8sj3dj6c
enc: |
recipient: age1avlhszrryt4gf4ya536jhzm7qwt9xfttm8x4sns6h9w2tahzqp8sspz9y5
- enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBBSS9CQlE0azMwbFo4V0Ez
V3pKL2QvY0NNdmoxZXhQRUU1cG9Temk5M3pZCjBZUHJFRmZ1bTZzQ2RGU2F3WXF5
@@ -59,8 +59,8 @@ sops:
Zm1ReDlMWW8wak5Jb0VncFV2bFJROWsKWi9DTgveMgGG8eK4qNeAGGG/gfiJS96G
232Tgf94Pb8eAU2zbF77pLWMaqTBbYPz1tggcMTfrAeDohq+/0sU8g==
-----END AGE ENCRYPTED FILE-----
- recipient: age1ped3hpugq06908ex8kgama33qckqe03rmac5pa6th87vks5d249qhshvqu
enc: |
recipient: age1dccte7xtwswgef089nd80dutp96xnezx5lrqnneh9cusegsnda8sj3dj6c
- enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBqS0p2TVhWd2VMQTNNR0or
amhNdTh5bmY3WWh1QkgveWZEMEtxSCt3Tng4CnVEKzhDMFVzRktndmI5OFhSei94
@@ -68,7 +68,8 @@ sops:
TC91cmtZWm03dzYwS2E3dkorNkdFY2sKj5OZHOtKx1NGPSGKsWjC/8+seUAhvmxb
wQ0iuPAq6yDLhYV69n7Jx4G9fKoidLIQxq+Ia+tLcYt58UDX7aixJQ==
-----END AGE ENCRYPTED FILE-----
lastmodified: "2026-05-04T03:17:35Z"
mac: ENC[AES256_GCM,data:hFnRRENIaWa+Yidk7JqY+XMZ4xyekQLY9MMSgo5j4w7UI+Fb82RpNjmFrOS13ck7kKS+CVLWrFSEDxWJYcjHyLE5btoi06SFiZYfDF0JlOfq6e+loTj+TqhyWyJoQ3ZYaj9weak+qCroN4q88EWrle3Rlw0b/67lnkkzYJYYOlk=,iv:i9DVooO/9Gjr+yWaEkdXEWABAyrp8qNPt48EzqoEfiA=,tag:8hYmoJ9p3fse9eckhYqkvg==,type:str]
recipient: age1ped3hpugq06908ex8kgama33qckqe03rmac5pa6th87vks5d249qhshvqu
lastmodified: "2026-06-16T18:49:15Z"
mac: ENC[AES256_GCM,data:Q51p1A317BYzKvXSpkx2HBduGLGvxdQFi/BCfUtKWV9uAJGlQDp//eGJ7kJEG6DIO6sWUddf75fBLYAQcqm3iogIetTLUuQl3OhHSpAPvGpUDC6Hh87sAZy/ebaN2cy7BhJy5cjPJ9JAkfHqLCFRP+cVIwS/eb87GzwvWdSGZbc=,iv:yTTwhHCB09Qb6a437VENFiWQPp8CHwd0TelFj4ugO3U=,tag:JXN58pSy2I04O44Hg4pQcw==,type:str]
unencrypted_suffix: _unencrypted
version: 3.12.1
version: 3.13.1

View File

@@ -15,7 +15,7 @@ in
(modulesPath + "/profiles/qemu-guest.nix")
];
system.stateVersion = "25.11";
system.stateVersion = "26.05";
time.timeZone = "UTC";
boot.loader.grub = {

View File

@@ -12,7 +12,7 @@ in
(modulesPath + "/profiles/qemu-guest.nix")
];
system.stateVersion = "25.11";
system.stateVersion = "26.05";
time.timeZone = "UTC";
boot.loader.grub = {

View File

@@ -11,12 +11,20 @@ in
./hardware-configuration.nix
];
system.stateVersion = "25.11";
system.stateVersion = "26.05";
time.timeZone = "America/New_York";
programs.firejail.enable = true;
programs.nix-ld.enable = true;
# Asahi Wi-Fi Resume Bug - The Broadcom driver can fail to reconnect after suspend on this MacBook.
powerManagement.resumeCommands = ''
${pkgs.kmod}/bin/modprobe -r brcmfmac_wcc 2>/dev/null || true
${pkgs.kmod}/bin/modprobe -r brcmfmac 2>/dev/null || true
${pkgs.kmod}/bin/modprobe brcmfmac
${pkgs.systemd}/bin/systemctl restart NetworkManager.service
'';
# System Config
reichard = {
nix = enabled;

View File

@@ -11,7 +11,7 @@ in
(modulesPath + "/profiles/qemu-guest.nix")
];
system.stateVersion = "25.11";
system.stateVersion = "26.05";
time.timeZone = "America/New_York";
programs.nix-ld.enable = true;

View File

@@ -3,7 +3,7 @@ let
inherit (lib.${namespace}) enabled;
in
{
system.stateVersion = "25.11";
system.stateVersion = "26.05";
time.timeZone = "America/New_York";
# Config Boot

View File

@@ -15,7 +15,7 @@ in
(modulesPath + "/profiles/qemu-guest.nix")
];
system.stateVersion = "25.11";
system.stateVersion = "26.05";
time.timeZone = "UTC";
boot.loader.grub = {

View File

@@ -15,7 +15,7 @@ in
(modulesPath + "/profiles/qemu-guest.nix")
];
system.stateVersion = "25.11";
system.stateVersion = "26.05";
time.timeZone = "UTC";
networking.firewall.allowedTCPPorts = [ 443 ];

View File

@@ -12,7 +12,7 @@ in
(modulesPath + "/profiles/qemu-guest.nix")
];
system.stateVersion = "25.11";
system.stateVersion = "26.05";
time.timeZone = "UTC";
networking.firewall.allowedTCPPorts = [ 443 ];

View File

@@ -15,7 +15,7 @@ let
};
in
{
system.stateVersion = "25.11";
system.stateVersion = "26.05";
time.timeZone = "America/New_York";
nixpkgs.config.allowUnfree = true;
hardware.nvidia-container-toolkit.enable = true;
@@ -100,6 +100,8 @@ in
opengl = {
enable = true;
enableNvidia = true;
# GTX 1080 Ti is Pascal; NVIDIA 590+ (nixpkgs stable = 595) dropped Pascal support.
nvidiaPackage = config.boot.kernelPackages.nvidiaPackages.legacy_580;
};
};

View File

@@ -3,7 +3,7 @@
}:
{
time.timeZone = "America/New_York";
system.stateVersion = "25.11";
system.stateVersion = "26.05";
reichard = {
system = {

View File

@@ -9,7 +9,7 @@ let
cfg = config.${namespace}.user;
in
{
system.stateVersion = "25.11";
system.stateVersion = "26.05";
time.timeZone = "America/New_York";
nixpkgs.config.allowUnfree = true;

View File

@@ -3,7 +3,7 @@ let
inherit (lib.${namespace}) enabled;
in
{
system.stateVersion = "25.11";
system.stateVersion = "26.05";
time.timeZone = "America/New_York";
programs.nix-ld.enable = true;

View File

@@ -7,16 +7,9 @@ let
inherit (lib.${namespace}) enabled;
in
{
system.stateVersion = "25.11";
system.stateVersion = "26.05";
time.timeZone = "America/New_York";
boot = {
kernelParams = [
# Mask GPE03 (EC wakeup events) to allow hibernation without spurious CPU wakeups
"acpi_mask_gpe=0x03"
];
};
programs.nix-ld.enable = true;
hardware = {
@@ -85,14 +78,4 @@ in
sops = enabled;
};
};
# Additional System Packages
environment.systemPackages = with pkgs; [
dool
jq
mosh
rclone
sqlite-interactive
unzip
];
}

View File

@@ -13,7 +13,7 @@ in
config = {
# Basic System
system.stateVersion = "25.11";
system.stateVersion = "26.05";
time.timeZone = "UTC";
reichard = {