chore(cleanup): sops, opencode, etc

This commit is contained in:
2026-01-11 22:19:31 -05:00
parent 1fe9396284
commit c8f5e744d0
32 changed files with 1210 additions and 676 deletions

View File

@@ -1,31 +1,35 @@
{ config, lib, namespace, ... }:
{ config
, lib
, namespace
, ...
}:
let
inherit (lib) mkIf mkEnableOption types;
inherit (lib.${namespace}) mkOpt;
getFile = lib.snowfall.fs.get-file;
user = config.users.users.${config.${namespace}.user.name};
cfg = config.${namespace}.security.sops;
in
{
options.${namespace}.security.sops = {
enable = lib.mkEnableOption "sops";
defaultSopsFile = mkOpt lib.types.path null "Default sops file.";
sshKeyPaths = mkOpt (with lib.types; listOf path) [
"/etc/ssh/ssh_host_ed25519_key"
] "SSH Key paths to use.";
options.${namespace}.security.sops = with types; {
enable = mkEnableOption "Enable sops";
defaultSopsFile = mkOpt str "secrets/systems/${config.system.name}.yaml" "Default sops file.";
sshKeyPaths = mkOpt (listOf path) [ ] "Additional SSH key paths to use.";
};
config = lib.mkIf cfg.enable {
config = mkIf cfg.enable {
sops = {
inherit (cfg) defaultSopsFile;
defaultSopsFile = getFile cfg.defaultSopsFile;
age = {
inherit (cfg) sshKeyPaths;
keyFile = "${config.users.users.${config.${namespace}.user.name}.home}/.config/sops/age/keys.txt";
keyFile = "${user.home}/.config/sops/age/keys.txt";
sshKeyPaths = [ "${config.home.homeDirectory}/.ssh/id_ed25519" ] ++ cfg.sshKeyPaths;
};
};
sops.secrets.builder_ssh_key = {
sopsFile = lib.snowfall.fs.get-file "secrets/default.yaml";
sopsFile = getFile "secrets/common/systems.yaml";
};
};
}

View File

@@ -3,24 +3,29 @@ local llm_assistant_model = "devstral-small-2-instruct"
local llm_infill_model = "qwen2.5-coder-3b-instruct"
-- Default Llama - Toggle Llama & Copilot
-- vim.g.copilot_filetypes = { ["*"] = false }
local current_mode = "copilot"
local function toggle_llm_fim_provider()
if current_mode == "llama" then
vim.g.copilot_filetypes = { ["*"] = true }
vim.cmd("Copilot enable")
vim.cmd("LlamaDisable")
current_mode = "copilot"
vim.notify("Copilot FIM enabled", vim.log.levels.INFO)
else
local current_fim = "llama"
local function switch_llm_fim_provider(switch_to)
if switch_to == "llama" then
vim.g.copilot_filetypes = { ["*"] = true }
vim.cmd("Copilot disable")
vim.cmd("LlamaEnable")
current_mode = "llama"
current_fim = "llama"
vim.notify("Llama FIM enabled", vim.log.levels.INFO)
else
vim.g.copilot_filetypes = { ["*"] = true }
vim.cmd("Copilot enable")
vim.cmd("LlamaDisable")
current_fim = "copilot"
vim.notify("Copilot FIM enabled", vim.log.levels.INFO)
end
end
vim.api.nvim_create_autocmd("VimEnter", {
callback = function()
switch_llm_fim_provider(current_fim)
end,
})
-- Copilot Configuration
vim.g.copilot_no_tab_map = true
@@ -75,7 +80,13 @@ codecompanion.setup({
-- Create KeyMaps for Code Companion
vim.keymap.set("n", "<leader>aa", codecompanion.actions, { desc = "Actions" })
vim.keymap.set("n", "<leader>af", toggle_llm_fim_provider, { desc = "Toggle FIM (Llama / Copilot)" })
vim.keymap.set("n", "<leader>af", function()
if current_fim == "llama" then
switch_llm_fim_provider("copilot")
else
switch_llm_fim_provider("llama")
end
end, { desc = "Toggle FIM (Llama / Copilot)" })
vim.keymap.set("n", "<leader>ao", function() require("snacks.terminal").toggle("opencode") end,
{ desc = "Toggle OpenCode" })
vim.keymap.set("v", "<leader>ai", ":CodeCompanion<cr>", { desc = "Inline Prompt" })

View File

@@ -134,7 +134,13 @@ setup_lsp("cssls", {
setup_lsp("ts_ls", {
on_attach = on_attach_no_formatting,
cmd = { nix_vars.tsls, "--stdio" },
filetypes = { "typescript", "typescriptreact" },
filetypes = { "typescript", "typescriptreact", "javascript" },
})
-- ESLint LSP
setup_lsp("eslint", {
on_attach = on_attach_no_formatting,
cmd = { nix_vars.vscls .. "/bin/vscode-eslint-language-server", "--stdio" },
})
-- C LSP Configuration
@@ -149,6 +155,11 @@ setup_lsp("lua_ls", {
filetypes = { "lua" },
})
-- Lua LSP Configuration
setup_lsp("sqls", {
cmd = { nix_vars.sqls },
})
-- Nix LSP Configuration
setup_lsp("nil_ls", {
filetypes = { "nix" },
@@ -205,44 +216,19 @@ setup_lsp("golangci_lint_ls", {
------------------------------------------------------
local none_ls = require("null-ls")
local eslintFiles = {
".eslintrc",
".eslintrc.js",
".eslintrc.cjs",
".eslintrc.yaml",
".eslintrc.yml",
".eslintrc.json",
"eslint.config.js",
"eslint.config.mjs",
"eslint.config.cjs",
"eslint.config.ts",
"eslint.config.mts",
"eslint.config.cts",
}
local has_eslint_in_parents = function(fname)
local root_file = require("lspconfig").util.insert_package_json(eslintFiles, "eslintConfig", fname)
return require("lspconfig").util.root_pattern(unpack(root_file))(fname)
end
none_ls.setup({
sources = {
-- Prettier Formatting
-- Formatting
none_ls.builtins.formatting.prettier,
none_ls.builtins.formatting.prettier.with({ filetypes = { "template" } }),
require("none-ls.diagnostics.eslint_d").with({
condition = function(utils)
return has_eslint_in_parents(vim.fn.getcwd())
end,
}),
none_ls.builtins.completion.spell,
none_ls.builtins.formatting.nixpkgs_fmt, -- TODO: nixd native LSP?
none_ls.builtins.diagnostics.sqlfluff,
none_ls.builtins.formatting.sqlfluff,
require("none-ls.formatting.autopep8").with({
filetypes = { "starlark", "python" },
extra_args = { "--max-line-length", "100" },
}),
-- Completion
none_ls.builtins.completion.spell,
},
on_attach = function(client, bufnr)
if client:supports_method("textDocument/formatting") then

View File

@@ -1,8 +1,9 @@
{ pkgs
, lib
, config
, namespace
, ...
{
pkgs,
lib,
config,
namespace,
...
}:
let
inherit (lib) mkIf;
@@ -178,6 +179,7 @@ in
sveltels = "${pkgs.nodePackages.svelte-language-server}/bin/svelteserver",
tsls = "${pkgs.nodePackages.typescript-language-server}/bin/typescript-language-server",
vscls = "${pkgs.nodePackages.vscode-langservers-extracted}",
sqls = "${pkgs.sqls}/bin/sqls",
}
return nix_vars
'';

View File

@@ -0,0 +1,65 @@
---
description: Creates and configures new OpenCode agents based on requirements
mode: subagent
temperature: 0.3
permission:
write: allow
---
You help users create custom OpenCode agents. When asked to create an agent:
1. **Understand the need**: Ask clarifying questions about:
- What tasks should this agent handle?
- Should it be primary or subagent?
- What tools does it need access to?
- Any special permissions or restrictions?
- Should it use a specific model?
2. **Generate the config**: Create a markdown file in the appropriate location:
- Global: `~/.config/opencode/agent/`
- Project: `.opencode/agent/`
3. **Available config options**:
- `description` (required): Brief description of agent purpose
- `mode`: "primary", "subagent", or "all" (defaults to "all")
- `temperature`: 0.0-1.0 (lower = focused, higher = creative)
- `maxSteps`: Limit agentic iterations
- `disable`: Set to true to disable agent
- `tools`: Control tool access (write, edit, bash, etc.)
- `permission`: Set to "ask", "allow", or "deny" for edit/bash/webfetch
- Additional provider-specific options pass through to the model
4. **Tools configuration**:
- Set individual tools: `write: true`, `bash: false`
- Use wildcards: `mymcp_*: false`
- Inherits from global config, agent config overrides
5. **Permissions** (for edit, bash, webfetch):
- `ask`: Prompt before running
- `allow`: Run without approval
- `deny`: Disable completely
- Can set per-command for bash: `"git push": "ask"`
6. **Keep it simple**: Start minimal, users can extend later.
7. **Explain usage**: Tell them how to invoke with `@agent-name`.
Example structure:
```markdown
---
description: [one-line purpose]
mode: subagent
model: anthropic/claude-sonnet-4-20250514
temperature: 0.2
tools:
write: false
bash: false
permission:
edit: deny
---
[Clear instructions for the agent's behavior]
```
Be conversational. Ask questions before generating.

View File

@@ -0,0 +1,66 @@
---
description: Discovers relevant code and builds a focused implementation plan with exact file references
mode: subagent
temperature: 0.4
permission:
"*": deny
context7_*: allow
glob: allow
grep: allow
list: allow
lsp: allow
read: allow
todoread: allow
todowrite: allow
---
You analyze requirements and discover the relevant code context needed for implementation.
**Your job:**
1. Read through the codebase to understand what exists
2. Identify specific files and line ranges relevant to the task
3. Create a focused plan with exact references for the @developer agent
4. Describe what needs to change and why
**Deliver a compressed context map:**
For each relevant file section, use this format:
`path/file.py:10-25` - Current behavior. Needed change.
Keep it to ONE sentence per part (what it does, what needs changing).
**Example:**
`auth.py:45-67` - Login function with basic validation. Add rate limiting using existing middleware pattern.
`middleware/rate_limit.py:10-35` - Rate limiter for API endpoints. Reference this implementation.
`config.py:78` - Rate limit config (5 req/min). Use these values.
**Don't include:**
- Full code snippets (developer will read the files)
- Detailed explanations (just pointers)
- Implementation details (that's developer's job)
**Do include:**
- Exact line ranges so developer reads only what's needed
- Key constraints or patterns to follow
- Dependencies between files
**Examples of good references:**
- "`auth.py:45-67` - login function, needs error handling"
- "`db.py:12-30` - connection logic, check timeout handling"
- "`api/routes.py:89` - endpoint definition to modify"
- "`tests/test_auth.py:23-45` - existing tests to update"
**Examples of good plans:**
"Add rate limiting to login:
- `auth.py:45-67` - Current login function with no rate limiting
- `middleware/rate_limit.py:10-35` - Existing rate limiter for API
- Need: Apply same pattern to login endpoint
- Related: `config.py:78` - Rate limit settings"
You're the context scout - provide precise pointers so @developer doesn't waste context searching.

View File

@@ -0,0 +1,76 @@
---
description: Implements code based on plans and addresses review feedback
mode: subagent
temperature: 0.3
permission:
"*": deny
bash: allow
context7_*: allow
edit: allow
glob: allow
grep: allow
list: allow
lsp: allow
read: allow
todoread: allow
todowrite: allow
---
You implement code. You are the only agent that modifies files.
**DO NOT re-analyze or re-plan.** @architect already did discovery and planning. You execute.
**When building from a plan:**
- Start with the specific files and lines mentioned in the plan
- Read incrementally if you need to understand:
- Function/class definitions referenced in those lines
- Import sources or dependencies
- Related code that must be updated together
- Stop reading once you understand what to change and how
- Don't search the entire codebase or read files "just in case"
- Trust the plan's pointers as your starting point
**Example workflow:**
1. Plan says: `auth.py:45-67` - Read lines 45-67
2. See it calls `validate_user()` - Read that function definition
3. Realize validate_user is imported from `utils.py` - Read that too
4. Implement changes across both files
5. Done
**When addressing review feedback:**
- **Critical findings** (security, logic errors): Must fix
- **Regular findings** (quality, errors): Must fix
- **Nits** (style, minor): Optional, use judgment
**Your workflow:**
1. Read the specific files mentioned in the plan
2. Implement the changes described
3. **When done, commit your work:**
```bash
git add -A
git commit -m "type: what you implemented"
```
**Conventional commit types:**
- `feat:` - New feature
- `fix:` - Bug fix
- `refactor:` - Code restructuring
- `docs:` - Documentation only
- `test:` - Adding/updating tests
- `chore:` - Maintenance tasks
4. Done
**Do NOT:**
- Re-read the entire codebase
- Search for additional context
- Second-guess the plan
- Do your own discovery phase
Be efficient. Trust @architect's context work. Just code.

View File

@@ -0,0 +1,46 @@
---
description: Orchestrates features or bug fixes by delegating to subagents
mode: primary
temperature: 0.2
maxSteps: 50
permission:
"*": deny
task: allow
---
You are a workflow orchestrator. You ONLY call subagents - you never analyze, plan, code, or review yourself. Your high level flow is @architect -> @developer -> @reviewer
**Your subagents:**
- **@architect** - Analyzes requirements and creates plans
- **@developer** - Implements the plan from @architect
- **@reviewer** - Reviews the implementation from @developer
**Your workflow:**
1. Call @architect with user requirements.
2. Present the plan to the user for approval or changes.
3. If the user requests changes:
- Call @architect again with the feedback.
- Repeat step 2.
4. Once the plan is approved, call @developer with the full, unmodified plan.
5. Call @reviewer with the @developer output.
6. If the verdict is NEEDS_WORK:
- Call @developer with the plan + review feedback.
7. Repeat steps 5-6 until the implementation is APPROVED or APPROVED_WITH_NITS.
8. Report completion to the user:
- If APPROVED: "Implementation complete and approved."
- If APPROVED_WITH_NITS: "Implementation complete. Optional improvements available: [list nits]. Address these? (yes/no)"
9. If the user wants nits fixed:
- Call @developer with the plan + nit list.
- Call @reviewer one final time.
10. Done.
**Rules:**
- Never do the work yourself - always delegate
- Pass information between agents clearly, do not leave out context from the previous agent
- On iteration 2+ of develop→review, always include both plan AND review feedback
- Keep user informed of which agent is working
- Nits are optional - don't require fixes
- Stop when code is approved or only nits remain

View File

@@ -0,0 +1,68 @@
---
description: Expert code reviewer providing structured feedback on implementations
mode: subagent
temperature: 0.2
permission:
"*": deny
bash:
"*": deny
"git diff *": allow
"git log *": allow
"git show *": allow
"git show": allow
"git status *": allow
"git status": allow
glob: allow
grep: allow
list: allow
lsp: allow
read: allow
---
You are an expert code reviewer. Review implementations and provide structured feedback.
**Your process:**
- Check for uncommitted changes first: `git status`
- If there are uncommitted changes, respond:
"ERROR: Found uncommitted changes. @developer must run `git add -A && git commit -m "type: description"` first."
- Otherwise, review the latest commit with `git show`
- Read full files for additional context only if needed
- Focus on the actual changes made by @developer
**You MUST start your response with a verdict line:**
VERDICT: [APPROVED | NEEDS_WORK | APPROVED_WITH_NITS]
**Then categorize all findings:**
**Critical Findings** (must fix):
- Security vulnerabilities
- Logical errors
- Data corruption risks
- Breaking changes
**Regular Findings** (should fix):
- Code quality issues
- Missing error handling
- Performance problems
- Maintainability concerns
**Nits** (optional):
- Style preferences
- Minor optimizations
- Documentation improvements
- Naming suggestions
**Verdict rules:**
- NEEDS_WORK: Any critical or regular findings exist
- APPROVED_WITH_NITS: Only nits remain
- APPROVED: No findings at all
If you list any critical or regular findings, your verdict MUST be NEEDS_WORK.
Be thorough but fair. Don't bikeshed.

View File

@@ -14,63 +14,95 @@ in
};
config = mkIf cfg.enable {
# Enable OpenCode
programs.opencode = {
enable = true;
package = pkgs.reichard.opencode;
enableMcpIntegration = true;
settings = {
theme = "catppuccin";
model = "llama-swap/devstral-small-2-instruct";
permission = {
edit = "allow";
bash = "ask";
webfetch = "ask";
doom_loop = "ask";
external_directory = "ask";
};
provider = {
"llama-swap" = {
npm = "@ai-sdk/openai-compatible";
options = {
baseURL = "https://llm-api.va.reichard.io/v1";
};
models = {
nemotron-3-nano-30b-thinking = {
name = "Nemotron 3 Nano (30B) - Thinking";
agents = {
orchestrator = ./config/agents/orchestrator.md;
architect = ./config/agents/architect.md;
developer = ./config/agents/developer.md;
reviewer = ./config/agents/reviewer.md;
agent-creator = ./config/agents/agent-creator.md;
};
};
# Define OpenCode Configuration
sops = {
secrets.context7_apikey = {
sopsFile = lib.snowfall.fs.get-file "secrets/common/evanreichard.yaml";
};
templates."opencode.json" = {
path = ".config/opencode/opencode.json";
content = builtins.toJSON {
"$schema" = "https://opencode.ai/config.json";
theme = "catppuccin";
# model = "llama-swap/devstral-small-2-instruct";
provider = {
"llama-swap" = {
npm = "@ai-sdk/openai-compatible";
options = {
baseURL = "https://llm-api.va.reichard.io/v1";
};
gpt-oss-20b-thinking = {
name = "GPT OSS (20B)";
};
devstral-small-2-instruct = {
name = "Devstral Small 2 (24B)";
};
qwen3-coder-30b-instruct = {
name = "Qwen3 Coder (30B)";
};
qwen3-next-80b-instruct = {
name = "Qwen3 Next (80B) - Instruct";
};
qwen3-30b-2507-thinking = {
name = "Qwen3 2507 (30B) Thinking";
};
qwen3-30b-2507-instruct = {
name = "Qwen3 2507 (30B) Instruct";
};
qwen3-4b-2507-instruct = {
name = "Qwen3 2507 (4B) - Instruct";
models = {
"hf:Qwen/Qwen3-Coder-480B-A35B-Instruct" = {
name = "Qwen3 Coder (480B) Instruct";
};
"hf:zai-org/GLM-4.7" = {
name = "GLM 4.7";
};
"hf:MiniMaxAI/MiniMax-M2.1" = {
name = "MiniMax M2.1";
};
devstral-small-2-instruct = {
name = "Devstral Small 2 (24B)";
};
qwen3-coder-30b-instruct = {
name = "Qwen3 Coder (30B)";
};
nemotron-3-nano-30b-thinking = {
name = "Nemotron 3 Nano (30B) - Thinking";
};
gpt-oss-20b-thinking = {
name = "GPT OSS (20B)";
};
qwen3-next-80b-instruct = {
name = "Qwen3 Next (80B) - Instruct";
};
qwen3-30b-2507-thinking = {
name = "Qwen3 2507 (30B) Thinking";
};
qwen3-30b-2507-instruct = {
name = "Qwen3 2507 (30B) Instruct";
};
qwen3-4b-2507-instruct = {
name = "Qwen3 2507 (4B) - Instruct";
};
};
};
};
};
lsp = {
starlark = {
command = [
"${pkgs.pyright}/bin/pyright-langserver"
"--stdio"
];
extensions = [
".star"
];
lsp = {
biome = {
disabled = true;
};
starlark = {
command = [
"${pkgs.pyright}/bin/pyright-langserver"
"--stdio"
];
extensions = [ ".star" ];
};
};
mcp = {
context7 = {
type = "remote";
url = "https://mcp.context7.com/mcp";
headers = {
CONTEXT7_API_KEY = "${config.sops.placeholder.context7_apikey}";
};
enabled = true;
};
};
};
};

View File

@@ -1,15 +1,21 @@
{ config, lib, namespace, pkgs, ... }:
{ config
, lib
, namespace
, pkgs
, ...
}:
let
inherit (lib) mkIf types;
inherit (lib) mkIf mkEnableOption types;
inherit (lib.${namespace}) mkOpt;
getFile = lib.snowfall.fs.get-file;
cfg = config.${namespace}.services.sops;
in
{
options.${namespace}.services.sops = with types; {
enable = lib.mkEnableOption "sops";
defaultSopsFile = mkOpt path null "Default sops file.";
sshKeyPaths = mkOpt (listOf path) [ ] "SSH Key paths to use.";
enable = mkEnableOption "Enable sops";
defaultSopsFile = mkOpt str "secrets/common/evanreichard.yaml" "Default sops file.";
sshKeyPaths = mkOpt (listOf path) [ ] "Additional SSH key paths to use.";
};
config = mkIf cfg.enable {
@@ -20,11 +26,9 @@ in
];
sops = {
inherit (cfg) defaultSopsFile;
defaultSopsFormat = "yaml";
defaultSopsFile = getFile cfg.defaultSopsFile;
age = {
generateKey = true;
keyFile = "${config.home.homeDirectory}/.config/sops/age/keys.txt";
sshKeyPaths = [ "${config.home.homeDirectory}/.ssh/id_ed25519" ] ++ cfg.sshKeyPaths;
};

View File

@@ -1,31 +1,39 @@
{ config, lib, namespace, ... }:
{ config
, lib
, namespace
, ...
}:
let
inherit (lib) mkIf mkEnableOption types;
inherit (lib.${namespace}) mkOpt;
getFile = lib.snowfall.fs.get-file;
user = config.users.users.${config.${namespace}.user.name};
cfg = config.${namespace}.security.sops;
in
{
options.${namespace}.security.sops = {
enable = lib.mkEnableOption "sops";
defaultSopsFile = mkOpt lib.types.path null "Default sops file.";
sshKeyPaths = mkOpt (with lib.types; listOf path) [
# "/etc/ssh/ssh_host_ed25519_key"
] "SSH Key paths to use.";
options.${namespace}.security.sops = with types; {
enable = mkEnableOption "Enable sops";
defaultSopsFile = mkOpt str "secrets/systems/${config.system.name}.yaml" "Default sops file.";
sshKeyPaths = mkOpt (listOf path) [ ] "Additional SSH key paths to use.";
};
config = lib.mkIf cfg.enable {
config = mkIf cfg.enable {
sops = {
inherit (cfg) defaultSopsFile;
defaultSopsFile = getFile cfg.defaultSopsFile;
age = {
inherit (cfg) sshKeyPaths;
keyFile = "${config.users.users.${config.${namespace}.user.name}.home}/.config/sops/age/keys.txt";
keyFile = "${user.home}/.config/sops/age/keys.txt";
sshKeyPaths = [
"/etc/ssh/ssh_host_ed25519_key"
"${user.home}/.ssh/id_ed25519"
]
++ cfg.sshKeyPaths;
};
};
sops.secrets.builder_ssh_key = {
sopsFile = lib.snowfall.fs.get-file "secrets/default.yaml";
sopsFile = getFile "secrets/common/systems.yaml";
};
};
}

View File

@@ -1,123 +0,0 @@
{
config,
pkgs,
lib,
namespace,
...
}:
let
inherit (lib) types mkIf mkEnableOption;
inherit (lib.${namespace}) mkOpt;
cfg = config.${namespace}.services.llama-cpp;
modelDir = "/models";
availableModels = {
"qwen2.5-coder-7b-q8_0.gguf" = {
url = "https://huggingface.co/ggml-org/Qwen2.5-Coder-7B-Q8_0-GGUF/resolve/main/qwen2.5-coder-7b-q8_0.gguf?download=true";
flag = "--fim-qwen-7b-default";
};
"qwen2.5-coder-3b-q8_0.gguf" = {
url = "https://huggingface.co/ggml-org/Qwen2.5-Coder-3B-Q8_0-GGUF/resolve/main/qwen2.5-coder-3b-q8_0.gguf?download=true";
flag = "--fim-qwen-3b-default";
};
};
in
{
options.${namespace}.services.llama-cpp = with types; {
enable = mkEnableOption "llama-cpp support";
modelName = mkOpt str "qwen2.5-coder-3b-q8_0.gguf" "model to use";
};
config =
let
modelPath = "${modelDir}/${cfg.modelName}";
in
mkIf cfg.enable {
assertions = [
{
assertion = availableModels ? ${cfg.modelName};
message = "Invalid model '${cfg.modelName}'. Available models: ${lib.concatStringsSep ", " (lib.attrNames availableModels)}";
}
];
systemd.services = {
# LLama Download Model
download-model = {
description = "Download Model";
wantedBy = [ "multi-user.target" ];
before = [ "llama-cpp.service" ];
path = [
pkgs.curl
pkgs.coreutils
];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
User = "root";
Group = "root";
};
script =
let
modelURL = availableModels.${cfg.modelName}.url;
in
''
set -euo pipefail
if [ ! -f "${modelPath}" ]; then
mkdir -p "${modelDir}"
# Add -f flag to follow redirects and -L for location
# Add --fail flag to exit with error on HTTP errors
# Add -C - to resume interrupted downloads
curl -f -L -C - \
-H "Accept: application/octet-stream" \
--retry 3 \
--retry-delay 5 \
--max-time 1800 \
"${modelURL}" \
-o "${modelPath}.tmp" && \
mv "${modelPath}.tmp" "${modelPath}"
fi
'';
};
# Setup LLama API Service
llama-cpp = {
after = [ "download-model.service" ];
requires = [ "download-model.service" ];
};
};
services.llama-cpp = {
enable = true;
host = "0.0.0.0";
port = 8012;
openFirewall = true;
model = "${modelPath}";
package =
(pkgs.llama-cpp.override {
cudaSupport = true;
blasSupport = true;
rocmSupport = false;
metalSupport = false;
}).overrideAttrs
(oldAttrs: {
cmakeFlags = oldAttrs.cmakeFlags ++ [
"-DGGML_CUDA_ENABLE_UNIFIED_MEMORY=1"
"-DCMAKE_CUDA_ARCHITECTURES=61" # GTX-1070 / GTX-1080ti
"-DGGML_NATIVE=ON"
# Disable CPU Instructions - Intel(R) Core(TM) i5-3570K CPU @ 3.40GHz
# "-DLLAMA_FMA=OFF"
# "-DLLAMA_AVX2=OFF"
# "-DLLAMA_AVX512=OFF"
# "-DGGML_FMA=OFF"
# "-DGGML_AVX2=OFF"
# "-DGGML_AVX512=OFF"
];
});
extraFlags = [ availableModels.${cfg.modelName}.flag ];
};
};
}

View File

@@ -0,0 +1,507 @@
{ config
, lib
, pkgs
, namespace
, ...
}:
let
inherit (lib) mkIf mkEnableOption;
cfg = config.${namespace}.services.llama-swap;
llama-swap = pkgs.reichard.llama-swap;
llama-cpp = pkgs.reichard.llama-cpp;
stable-diffusion-cpp = pkgs.reichard.stable-diffusion-cpp.override {
cudaSupport = true;
};
in
{
options.${namespace}.services.llama-swap = {
enable = mkEnableOption "enable llama-swap service";
};
config = mkIf cfg.enable {
# Create User
users.groups.llama-swap = { };
users.users.llama-swap = {
isSystemUser = true;
group = "llama-swap";
};
# Create Service
systemd.services.llama-swap = {
description = "Model swapping for LLaMA C++ Server (or any local OpenAPI compatible server)";
after = [ "network.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
Type = "exec";
ExecStart = "${lib.getExe llama-swap} --listen :8080 --config ${
config.sops.templates."llama-swap.json".path
}";
Restart = "on-failure";
RestartSec = 3;
# for GPU acceleration
PrivateDevices = false;
# hardening
User = "llama-swap";
Group = "llama-swap";
CapabilityBoundingSet = "";
RestrictAddressFamilies = [
"AF_INET"
"AF_INET6"
"AF_UNIX"
];
NoNewPrivileges = true;
PrivateMounts = true;
PrivateTmp = true;
PrivateUsers = true;
ProtectClock = true;
ProtectControlGroups = true;
ProtectHome = true;
ProtectKernelLogs = true;
ProtectKernelModules = true;
ProtectKernelTunables = true;
ProtectSystem = "strict";
MemoryDenyWriteExecute = true;
LimitMEMLOCK = "infinity";
LockPersonality = true;
RemoveIPC = true;
RestrictNamespaces = true;
RestrictRealtime = true;
RestrictSUIDSGID = true;
SystemCallArchitectures = "native";
SystemCallFilter = [
"@system-service"
"~@privileged"
];
SystemCallErrorNumber = "EPERM";
ProtectProc = "invisible";
ProtectHostname = true;
ProcSubset = "pid";
};
};
# Create Config
sops = {
secrets.synthetic_apikey = {
sopsFile = lib.snowfall.fs.get-file "secrets/common/systems.yaml";
};
templates."llama-swap.json" = {
owner = "llama-swap";
group = "llama-swap";
mode = "0400";
content = builtins.toJSON {
models = {
# https://huggingface.co/unsloth/Devstral-Small-2-24B-Instruct-2512-GGUF/tree/main
"devstral-small-2-instruct" = {
name = "Devstral Small 2 (24B) - Instruct";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Devstral/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL.gguf \
--chat-template-file /mnt/ssd/Models/Devstral/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL_template.jinja \
--temp 0.15 \
-c 98304 \
-ctk q8_0 \
-ctv q8_0 \
-fit off \
-dev CUDA0
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/mradermacher/gpt-oss-20b-heretic-v2-i1-GGUF/tree/main
"gpt-oss-20b-thinking" = {
name = "GPT OSS (20B) - Thinking";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/GPT-OSS/gpt-oss-20b-heretic-v2.i1-MXFP4_MOE.gguf \
-c 131072 \
--temp 1.0 \
--top-p 1.0 \
--top-k 40 \
-dev CUDA0
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/mradermacher/GPT-OSS-Cybersecurity-20B-Merged-i1-GGUF/tree/main
"gpt-oss-csec-20b-thinking" = {
name = "GPT OSS CSEC (20B) - Thinking";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/GPT-OSS/GPT-OSS-Cybersecurity-20B-Merged.i1-MXFP4_MOE.gguf \
-c 131072 \
--temp 1.0 \
--top-p 1.0 \
--top-k 40 \
-dev CUDA0
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Qwen3-Next-80B-A3B-Instruct-GGUF/tree/main
"qwen3-next-80b-instruct" = {
name = "Qwen3 Next (80B) - Instruct";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3/Qwen3-Next-80B-A3B-Instruct-UD-Q2_K_XL.gguf \
-c 262144 \
--temp 0.7 \
--min-p 0.0 \
--top-p 0.8 \
--top-k 20 \
--repeat-penalty 1.05 \
-ctk q8_0 \
-ctv q8_0 \
-fit off
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF/tree/main
"qwen3-30b-2507-instruct" = {
name = "Qwen3 2507 (30B) - Instruct";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3/Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf \
-c 262144 \
--temp 0.7 \
--min-p 0.0 \
--top-p 0.8 \
--top-k 20 \
--repeat-penalty 1.05 \
-ctk q8_0 \
-ctv q8_0 \
-ts 70,30 \
-fit off
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF/tree/main
"qwen3-coder-30b-instruct" = {
name = "Qwen3 Coder (30B) - Instruct";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3/Qwen3-Coder-30B-A3B-Instruct-UD-Q6_K_XL.gguf \
-c 131072 \
--temp 0.7 \
--min-p 0.0 \
--top-p 0.8 \
--top-k 20 \
--repeat-penalty 1.05 \
-ctk q8_0 \
-ctv q8_0 \
-ts 70,30 \
-fit off
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Qwen3-30B-A3B-Thinking-2507-GGUF/tree/main
"qwen3-30b-2507-thinking" = {
name = "Qwen3 2507 (30B) - Thinking";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3/Qwen3-30B-A3B-Thinking-2507-UD-Q4_K_XL.gguf \
-c 262144 \
--temp 0.7 \
--min-p 0.0 \
--top-p 0.8 \
--top-k 20 \
--repeat-penalty 1.05 \
-ctk q8_0 \
-ctv q8_0 \
-ts 70,30 \
-fit off
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Nemotron-3-Nano-30B-A3B-GGUF/tree/main
"nemotron-3-nano-30b-thinking" = {
name = "Nemotron 3 Nano (30B) - Thinking";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Nemotron/Nemotron-3-Nano-30B-A3B-UD-Q4_K_XL.gguf \
-c 1048576 \
--temp 1.1 \
--top-p 0.95 \
-fit off
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Qwen3-VL-8B-Instruct-GGUF/tree/main
"qwen3-8b-vision" = {
name = "Qwen3 Vision (8B) - Thinking";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3/Qwen3-VL-8B-Instruct-UD-Q4_K_XL.gguf \
--mmproj /mnt/ssd/Models/Qwen3/Qwen3-VL-8B-Instruct-UD-Q4_K_XL_mmproj-F16.gguf \
-c 65536 \
--temp 0.7 \
--min-p 0.0 \
--top-p 0.8 \
--top-k 20 \
-ctk q8_0 \
-ctv q8_0 \
-fit off \
-dev CUDA1
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Qwen2.5-Coder-7B-Instruct-128K-GGUF/tree/main
"qwen2.5-coder-7b-instruct" = {
name = "Qwen2.5 Coder (7B) - Instruct";
cmd = ''
${llama-cpp}/bin/llama-server \
-m /mnt/ssd/Models/Qwen2.5/Qwen2.5-Coder-7B-Instruct-Q8_0.gguf \
--fim-qwen-7b-default \
-c 131072 \
--port ''${PORT} \
-fit off \
-dev CUDA1
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Qwen2.5-Coder-3B-Instruct-128K-GGUF/tree/main
"qwen2.5-coder-3b-instruct" = {
name = "Qwen2.5 Coder (3B) - Instruct";
cmd = ''
${llama-cpp}/bin/llama-server \
-m /mnt/ssd/Models/Qwen2.5/Qwen2.5-Coder-3B-Instruct-Q8_0.gguf \
--fim-qwen-3b-default \
--port ''${PORT} \
-fit off \
-dev CUDA1
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Qwen3-4B-Instruct-2507-GGUF/tree/main
"qwen3-4b-2507-instruct" = {
name = "Qwen3 2507 (4B) - Instruct";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3/Qwen3-4B-Instruct-2507-Q4_K_M.gguf \
-c 98304 \
-fit off \
-ctk q8_0 \
-ctv q8_0 \
-dev CUDA1
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
"z-image-turbo" = {
name = "Z-Image-Turbo";
checkEndpoint = "/";
cmd = ''
${stable-diffusion-cpp}/bin/sd-server \
--listen-port ''${PORT} \
--diffusion-fa \
--diffusion-model /mnt/ssd/StableDiffusion/ZImageTurbo/z-image-turbo-Q8_0.gguf \
--vae /mnt/ssd/StableDiffusion/ZImageTurbo/ae.safetensors \
--llm /mnt/ssd/Models/Qwen3/Qwen3-4B-Instruct-2507-Q4_K_M.gguf \
--cfg-scale 1.0 \
--steps 8 \
--rng cuda
'';
metadata = {
type = [ "image-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/tree/main
"qwen-image-edit-2511" = {
name = "Qwen Image Edit 2511";
checkEndpoint = "/";
cmd = ''
${stable-diffusion-cpp}/bin/sd-server \
--listen-port ''${PORT} \
--diffusion-fa \
--qwen-image-zero-cond-t \
--diffusion-model /mnt/ssd/StableDiffusion/QwenImage/qwen-image-edit-2511-Q5_K_M.gguf \
--vae /mnt/ssd/StableDiffusion/QwenImage/qwen_image_vae.safetensors \
--llm /mnt/ssd/Models/Qwen2.5/Qwen2.5-VL-7B-Instruct.Q4_K_M.gguf \
--lora-model-dir /mnt/ssd/StableDiffusion/QwenImage/Loras \
--cfg-scale 2.5 \
--sampling-method euler \
--flow-shift 3 \
--steps 20 \
--rng cuda
'';
metadata = {
type = [
"image-edit"
"image-generation"
];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
"qwen-image-2512" = {
name = "Qwen Image 2512";
checkEndpoint = "/";
cmd = ''
${stable-diffusion-cpp}/bin/sd-server \
--listen-port ''${PORT} \
--diffusion-fa \
--diffusion-model /mnt/ssd/StableDiffusion/QwenImage/qwen-image-2512-Q5_K_M.gguf \
--vae /mnt/ssd/StableDiffusion/QwenImage/qwen_image_vae.safetensors \
--llm /mnt/ssd/Models/Qwen2.5/Qwen2.5-VL-7B-Instruct.Q4_K_M.gguf \
--lora-model-dir /mnt/ssd/StableDiffusion/QwenImage/Loras \
--cfg-scale 2.5 \
--sampling-method euler \
--flow-shift 3 \
--steps 20 \
--rng cuda
'';
metadata = {
type = [ "image-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
"chroma-radiance" = {
name = "Chroma Radiance";
checkEndpoint = "/";
cmd = ''
${stable-diffusion-cpp}/bin/sd-server \
--listen-port ''${PORT} \
--diffusion-fa --chroma-disable-dit-mask \
--diffusion-model /mnt/ssd/StableDiffusion/Chroma/chroma_radiance_x0_q8.gguf \
--t5xxl /mnt/ssd/StableDiffusion/Chroma/t5xxl_fp16.safetensors \
--cfg-scale 4.0 \
--sampling-method euler \
--rng cuda
'';
metadata = {
type = [ "image-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
};
groups = {
shared = {
swap = true;
exclusive = false;
members = [
"nemotron-3-nano-30b-thinking"
"qwen3-30b-2507-instruct"
"qwen3-30b-2507-thinking"
"qwen3-coder-30b-instruct"
"qwen3-next-80b-instruct"
];
};
cuda0 = {
swap = true;
exclusive = false;
members = [
"devstral-small-2-instruct"
"gpt-oss-20b-thinking"
"gpt-oss-csec-20b-thinking"
];
};
cuda1 = {
swap = true;
exclusive = false;
members = [
"qwen2.5-coder-3b-instruct"
"qwen2.5-coder-7b-instruct"
"qwen3-4b-2507-instruct"
"qwen3-8b-vision"
];
};
};
peers = {
synthetic = {
proxy = "https://api.synthetic.new/openai/";
apiKey = "${config.sops.placeholder.synthetic_apikey}";
models = [
"hf:deepseek-ai/DeepSeek-R1-0528"
"hf:deepseek-ai/DeepSeek-V3"
"hf:deepseek-ai/DeepSeek-V3-0324"
"hf:deepseek-ai/DeepSeek-V3.1"
"hf:deepseek-ai/DeepSeek-V3.1-Terminus"
"hf:deepseek-ai/DeepSeek-V3.2"
"hf:meta-llama/Llama-3.3-70B-Instruct"
"hf:meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
"hf:MiniMaxAI/MiniMax-M2"
"hf:MiniMaxAI/MiniMax-M2.1"
"hf:moonshotai/Kimi-K2-Instruct-0905"
"hf:moonshotai/Kimi-K2-Thinking"
"hf:openai/gpt-oss-120b"
"hf:Qwen/Qwen3-235B-A22B-Instruct-2507"
"hf:Qwen/Qwen3-235B-A22B-Thinking-2507"
"hf:Qwen/Qwen3-Coder-480B-A35B-Instruct"
"hf:Qwen/Qwen3-VL-235B-A22B-Instruct"
"hf:zai-org/GLM-4.5"
"hf:zai-org/GLM-4.6"
"hf:zai-org/GLM-4.7"
];
};
};
};
};
};
networking.firewall.allowedTCPPorts = [ 8080 ];
};
}

View File

@@ -14,16 +14,11 @@ let
cfg = config.${namespace}.services.openssh;
globalKeys = [
# evanreichard@lin-va-mbp-personal
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILJJoyXQOv9cAjGUHrUcvsW7vY9W0PmuPMQSI9AMZvNY"
# evanreichard@mac-va-mbp-personal
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMWj6rd6uDtHj/gGozgIEgxho/vBKebgN5Kce/N6vQWV"
# evanreichard@lin-va-thinkpad
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAq5JQr/6WJMIHhR434nK95FrDmf2ApW2Ahd2+cBKwDz"
# evanreichard@lin-va-terminal
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM5e6Cty+7rX5BjIEHBTU6GnzfOxPJiHpSqin/BnsypO"
# evanreichard@mobile
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIARTNbl4lgQsp7SJEng7vprL0+ChC9e6iR7o/PiC4Jme"
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILJJoyXQOv9cAjGUHrUcvsW7vY9W0PmuPMQSI9AMZvNY evanreichard@lin-va-mbp-personal"
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMWj6rd6uDtHj/gGozgIEgxho/vBKebgN5Kce/N6vQWV evanreichard@mac-va-mbp-personal"
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAq5JQr/6WJMIHhR434nK95FrDmf2ApW2Ahd2+cBKwDz evanreichard@lin-va-thinkpad"
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM5e6Cty+7rX5BjIEHBTU6GnzfOxPJiHpSqin/BnsypO evanreichard@lin-va-terminal"
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIARTNbl4lgQsp7SJEng7vprL0+ChC9e6iR7o/PiC4Jme evanreichard@mobile"
];
in
{