diff --git a/modules/home/programs/terminal/nvim/config/lua/llm-config.lua b/modules/home/programs/terminal/nvim/config/lua/llm-config.lua
index 776de31..68b19c8 100755
--- a/modules/home/programs/terminal/nvim/config/lua/llm-config.lua
+++ b/modules/home/programs/terminal/nvim/config/lua/llm-config.lua
@@ -1,33 +1,16 @@
 local llm_endpoint = "https://llm-api.va.reichard.io"
-local llm_assistant_model = "devstral-small-2-instruct"
-local llm_infill_model = "qwen2.5-coder-3b-instruct"
+local llm_assistant_model = "qwen3-coder-30b-instruct"
+local llm_infill_model = "qwen3-coder-30b-instruct"
 
--- Default Llama - Toggle Llama & Copilot
-local current_fim = "llama"
-local function switch_llm_fim_provider(switch_to)
-	if switch_to == "llama" then
-		vim.g.copilot_filetypes = { ["*"] = true }
-		vim.cmd("Copilot disable")
-		vim.cmd("LlamaEnable")
-		current_fim = "llama"
-		vim.notify("Llama FIM enabled", vim.log.levels.INFO)
-	else
-		vim.g.copilot_filetypes = { ["*"] = true }
-		vim.cmd("Copilot enable")
-		vim.cmd("LlamaDisable")
-		current_fim = "copilot"
-		vim.notify("Copilot FIM enabled", vim.log.levels.INFO)
-	end
-end
+-- local llm_assistant_model = "devstral-small-2-instruct"
+-- local llm_infill_model = "qwen2.5-coder-3b-instruct"
 
-vim.api.nvim_create_autocmd("VimEnter", {
-	callback = function()
-		switch_llm_fim_provider(current_fim)
-	end,
-})
+
+local current_fim = "copilot" -- change this to switch default
 
 -- Copilot Configuration
 vim.g.copilot_no_tab_map = true
+vim.g.copilot_filetypes = { ["*"] = true }
 
 -- LLama LLM FIM
 vim.g.llama_config = {
@@ -35,9 +18,24 @@ vim.g.llama_config = {
 	model = llm_infill_model,
 	n_predict = 2048,
 	ring_n_chunks = 32,
-	enable_at_startup = false,
+	enable_at_startup = (current_fim == "llama"), -- enable based on default
 }
 
+-- Toggle function for manual switching
+local function switch_llm_fim_provider(switch_to)
+	if switch_to == "llama" then
+		vim.cmd("Copilot disable")
+		vim.cmd("LlamaEnable")
+		current_fim = "llama"
+		vim.notify("Llama FIM enabled", vim.log.levels.INFO)
+	else
+		vim.cmd("Copilot enable")
+		vim.cmd("LlamaDisable")
+		current_fim = "copilot"
+		vim.notify("Copilot FIM enabled", vim.log.levels.INFO)
+	end
+end
+
 -- Configure Code Companion
 require("plugins.codecompanion.fidget-spinner"):init()
 local codecompanion = require("codecompanion")
diff --git a/modules/home/programs/terminal/opencode/config/agents/architect.md b/modules/home/programs/terminal/opencode/config/agents/architect.md
deleted file mode 100644
index f92d36e..0000000
--- a/modules/home/programs/terminal/opencode/config/agents/architect.md
+++ /dev/null
@@ -1,66 +0,0 @@
----
-description: Discovers relevant code and builds a focused implementation plan with exact file references
-mode: subagent
-temperature: 0.4
-permission:
-  "*": deny
-  context7_*: allow
-  glob: allow
-  grep: allow
-  list: allow
-  lsp: allow
-  read: allow
-  todoread: allow
-  todowrite: allow
----
-
-You analyze requirements and discover the relevant code context needed for implementation.
-
-**Your job:**
-
-1. Read through the codebase to understand what exists
-2. Identify specific files and line ranges relevant to the task
-3. Create a focused plan with exact references for the @developer agent
-4. Describe what needs to change and why
-
-**Deliver a compressed context map:**
-
-For each relevant file section, use this format:
-`path/file.py:10-25` - Current behavior. Needed change.
-
-Keep it to ONE sentence per part (what it does, what needs changing).
-
-**Example:**
-`auth.py:45-67` - Login function with basic validation. Add rate limiting using existing middleware pattern.
-`middleware/rate_limit.py:10-35` - Rate limiter for API endpoints. Reference this implementation.
-`config.py:78` - Rate limit config (5 req/min). Use these values.
-
-**Don't include:**
-
-- Full code snippets (developer will read the files)
-- Detailed explanations (just pointers)
-- Implementation details (that's developer's job)
-
-**Do include:**
-
-- Exact line ranges so developer reads only what's needed
-- Key constraints or patterns to follow
-- Dependencies between files
-
-**Examples of good references:**
-
-- "`auth.py:45-67` - login function, needs error handling"
-- "`db.py:12-30` - connection logic, check timeout handling"
-- "`api/routes.py:89` - endpoint definition to modify"
-- "`tests/test_auth.py:23-45` - existing tests to update"
-
-**Examples of good plans:**
-
-"Add rate limiting to login:
-
-- `auth.py:45-67` - Current login function with no rate limiting
-- `middleware/rate_limit.py:10-35` - Existing rate limiter for API
-- Need: Apply same pattern to login endpoint
-- Related: `config.py:78` - Rate limit settings"
-
-You're the context scout - provide precise pointers so @developer doesn't waste context searching.
diff --git a/modules/home/programs/terminal/opencode/config/agents/developer.md b/modules/home/programs/terminal/opencode/config/agents/developer.md
index c6b0819..d5d5463 100644
--- a/modules/home/programs/terminal/opencode/config/agents/developer.md
+++ b/modules/home/programs/terminal/opencode/config/agents/developer.md
@@ -1,5 +1,5 @@
 ---
-description: Implements code based on plans and addresses review feedback
+description: Implements code from plans and review feedback
 mode: subagent
 temperature: 0.3
 permission:
@@ -16,61 +16,29 @@ permission:
   todowrite: allow
 ---
 
-You implement code. You are the only agent that modifies files.
+You implement code. You're the only agent that modifies files.
 
-**DO NOT re-analyze or re-plan.** @architect already did discovery and planning. You execute.
+**Input:**
 
-**When building from a plan:**
+- Plan file path from @planner
+- Optional: Review feedback from @reviewer
 
-- Start with the specific files and lines mentioned in the plan
-- Read incrementally if you need to understand:
-  - Function/class definitions referenced in those lines
-  - Import sources or dependencies
-  - Related code that must be updated together
-- Stop reading once you understand what to change and how
-- Don't search the entire codebase or read files "just in case"
-- Trust the plan's pointers as your starting point
-
-**Example workflow:**
-
-1. Plan says: `auth.py:45-67` - Read lines 45-67
-2. See it calls `validate_user()` - Read that function definition
-3. Realize validate_user is imported from `utils.py` - Read that too
-4. Implement changes across both files
-5. Done
-
-**When addressing review feedback:**
-
-- **Critical findings** (security, logic errors): Must fix
-- **Regular findings** (quality, errors): Must fix
-- **Nits** (style, minor): Optional, use judgment
-
-**Your workflow:**
-
-1. Read the specific files mentioned in the plan
-2. Implement the changes described
-3. **When done, commit your work:**
+**Workflow:**
 
+1. Read the plan file
+2. Read the specific files/lines mentioned in context maps
+3. Read incrementally if needed (imports, function definitions, etc.)
+4. Implement changes
+5. Commit:
    ```bash
    git add -A
-   git commit -m "type: what you implemented"
+   git commit -m "type: description"
    ```
+   Types: `feat`, `fix`, `refactor`, `docs`, `test`, `chore`
 
-   **Conventional commit types:**
-   - `feat:` - New feature
-   - `fix:` - Bug fix
-   - `refactor:` - Code restructuring
-   - `docs:` - Documentation only
-   - `test:` - Adding/updating tests
-   - `chore:` - Maintenance tasks
+**Rules:**
 
-4. Done
-
-**Do NOT:**
-
-- Re-read the entire codebase
-- Search for additional context
-- Second-guess the plan
-- Do your own discovery phase
-
-Be efficient. Trust @architect's context work. Just code.
+- Trust the plan - don't re-analyze or re-plan
+- Start with context map locations, expand only as needed
+- Fix all critical/regular findings, use judgment on nits
+- Stop reading once you understand the change
diff --git a/modules/home/programs/terminal/opencode/config/agents/orchestrator.md b/modules/home/programs/terminal/opencode/config/agents/orchestrator.md
index d125510..0cd0a49 100644
--- a/modules/home/programs/terminal/opencode/config/agents/orchestrator.md
+++ b/modules/home/programs/terminal/opencode/config/agents/orchestrator.md
@@ -1,46 +1,37 @@
 ---
-description: Orchestrates features or bug fixes by delegating to subagents
+description: Orchestrates development by delegating to subagents
 mode: primary
 temperature: 0.2
 maxSteps: 50
 permission:
   "*": deny
-  task: allow
+  task:
+    "*": deny
+    planner: allow
+    developer: allow
+    reviewer: allow
 ---
 
-You are a workflow orchestrator. You ONLY call subagents - you never analyze, plan, code, or review yourself. Your high level flow is @architect -> @developer -> @reviewer
+You orchestrate development by delegating to subagents. Never code yourself.
 
-**Your subagents:**
+**Subagents:**
 
-- **@architect** - Analyzes requirements and creates plans
-- **@developer** - Implements the plan from @architect
-- **@reviewer** - Reviews the implementation from @developer
+- **@planner** - Creates implementation plans in `./plans/`
+- **@developer** - Implements from plan files
+- **@reviewer** - Reviews implementations
 
-**Your workflow:**
+**Workflow:**
 
-1. Call @architect with user requirements.
-2. Present the plan to the user for approval or changes.
-3. If the user requests changes:
-   - Call @architect again with the feedback.
-   - Repeat step 2.
-4. Once the plan is approved, call @developer with the full, unmodified plan.
-5. Call @reviewer with the @developer output.
-6. If the verdict is NEEDS_WORK:
-   - Call @developer with the plan + review feedback.
-7. Repeat steps 5-6 until the implementation is APPROVED or APPROVED_WITH_NITS.
-8. Report completion to the user:
-   - If APPROVED: "Implementation complete and approved."
-   - If APPROVED_WITH_NITS: "Implementation complete. Optional improvements available: [list nits]. Address these? (yes/no)"
-9. If the user wants nits fixed:
-   - Call @developer with the plan + nit list.
-   - Call @reviewer one final time.
-10. Done.
+1. **Plan**: Call @planner with requirements
+2. **Review Plan**: Show user the plan path, ask for approval
+3. **Develop**: Call @developer with plan file path
+4. **Review Code**: Call @reviewer with implementation
+5. **Iterate**: If NEEDS_WORK, call @developer with plan + feedback
+6. **Done**: When APPROVED or APPROVED_WITH_NITS
 
 **Rules:**
 
-- Never do the work yourself - always delegate
-- Pass information between agents clearly, do not leave out context from the previous agent
-- On iteration 2+ of develop→review, always include both plan AND review feedback
-- Keep user informed of which agent is working
-- Nits are optional - don't require fixes
-- Stop when code is approved or only nits remain
+- Always pass plan file path to @developer (not plan content)
+- Include review feedback on iterations
+- Nits are optional - ask user if they want them fixed
+- Keep user informed of current step
diff --git a/modules/home/programs/terminal/opencode/config/agents/planner.md b/modules/home/programs/terminal/opencode/config/agents/planner.md
new file mode 100644
index 0000000..029f523
--- /dev/null
+++ b/modules/home/programs/terminal/opencode/config/agents/planner.md
@@ -0,0 +1,100 @@
+---
+description: Explores codebase and breaks features into ordered implementation tasks. Writes plans to ./plans/
+mode: subagent
+temperature: 0.3
+permission:
+  "*": deny
+  context7_*: allow
+  edit: allow
+  glob: allow
+  grep: allow
+  list: allow
+  lsp: allow
+  read: allow
+---
+
+# Code Task Planner Agent
+
+You are a code analysis agent that breaks down feature requests into implementable, independent tasks.
+
+## Your Task
+
+1. **Analyze the codebase** using available tools (grep, lsp, read, etc.)
+2. **Identify dependencies** between components
+3. **Create ordered tasks** where each task can be implemented independently
+4. **Generate context maps** showing exact files and line numbers that need changes
+5. **Write the plan** to `./plans/<PLAN_NAME>.md`
+
+## Task Requirements
+
+- **Independent**: Each task should be implementable without future tasks
+- **Hierarchical**: Dependencies must come before dependents
+- **Specific**: Include exact file paths and line numbers
+- **Contextual**: Explain WHY each file matters (1-2 lines max)
+
+## Output Format
+
+Write to `./plans/<PLAN_NAME>.md` with this structure:
+
+```markdown
+# Plan: <PLAN_NAME>
+
+## Feature Overview
+
+<feature summary>
+
+## Implementation Tasks
+
+### Task 1: <Descriptive Title>
+
+**Context Map:**
+
+- `<file_path>:<line_number>` - <why it's relevant or what changes>
+- `<file_path>:<line_number>` - <why it's relevant or what changes>
+
+---
+
+### Task 2: <Descriptive Title>
+
+**Context Map:**
+
+- `<file_path>:<line_number>` - <why it's relevant or what changes>
+
+---
+```
+
+## Analysis Strategy
+
+1. **Start with interfaces/contracts** - these are foundational
+2. **Then implementations** - concrete types that satisfy interfaces
+3. **Then handlers/controllers** - code that uses the implementations
+4. **Finally integrations** - wiring everything together
+
+## Context Map Guidelines
+
+- Use exact line numbers from actual code analysis
+- Be specific: "Add AddChat method" not "modify file"
+- Include both new additions AND modifications to existing code
+- If a file doesn't exist yet, use line 0 and note "new file"
+
+## Example
+
+```markdown
+### Task 1: Add Store Interface Methods
+
+**Context Map:**
+
+- `./internal/store/interface.go:15` - Add Conversation struct definition
+- `./internal/store/interface.go:28` - Add AddConversation method to Store interface
+- `./internal/store/interface.go:32` - Add AddMessage method to Store interface
+```
+
+Remember: The context map is what developers see FIRST, so make it count!
+
+## Completion
+
+After writing the plan file, respond with:
+
+**Plan created:** `<PLAN_NAME>`
+**Path:** `./plans/<PLAN_NAME>.md`
+**Tasks:** <number of tasks>
diff --git a/modules/home/programs/terminal/opencode/config/agents/reviewer.md b/modules/home/programs/terminal/opencode/config/agents/reviewer.md
index 0c891e4..a7247e1 100644
--- a/modules/home/programs/terminal/opencode/config/agents/reviewer.md
+++ b/modules/home/programs/terminal/opencode/config/agents/reviewer.md
@@ -1,5 +1,5 @@
 ---
-description: Expert code reviewer providing structured feedback on implementations
+description: Reviews implementations and provides structured feedback
 mode: subagent
 temperature: 0.2
 permission:
@@ -19,50 +19,35 @@ permission:
   read: allow
 ---
 
-You are an expert code reviewer. Review implementations and provide structured feedback.
+You review code implementations.
 
-**Your process:**
+**Process:**
 
-- Check for uncommitted changes first: `git status`
-- If there are uncommitted changes, respond:
-  "ERROR: Found uncommitted changes. @developer must run `git add -A && git commit -m "type: description"` first."
-- Otherwise, review the latest commit with `git show`
-- Read full files for additional context only if needed
-- Focus on the actual changes made by @developer
+1. Check `git status` - if uncommitted changes, stop and tell @developer to commit
+2. Review latest commit with `git show`
+3. Read full files only if needed for context
 
-**You MUST start your response with a verdict line:**
+**Response format:**
 
 VERDICT: [APPROVED | NEEDS_WORK | APPROVED_WITH_NITS]
 
-**Then categorize all findings:**
+**Critical:** (security, logic errors, data corruption)
 
-**Critical Findings** (must fix):
+- Finding 1
+- Finding 2
 
-- Security vulnerabilities
-- Logical errors
-- Data corruption risks
-- Breaking changes
+**Regular:** (quality, error handling, performance)
 
-**Regular Findings** (should fix):
+- Finding 1
 
-- Code quality issues
-- Missing error handling
-- Performance problems
-- Maintainability concerns
+**Nits:** (style, minor improvements)
 
-**Nits** (optional):
-
-- Style preferences
-- Minor optimizations
-- Documentation improvements
-- Naming suggestions
+- Finding 1
 
 **Verdict rules:**
 
-- NEEDS_WORK: Any critical or regular findings exist
-- APPROVED_WITH_NITS: Only nits remain
-- APPROVED: No findings at all
+- NEEDS_WORK: Any critical or regular findings
+- APPROVED_WITH_NITS: Only nits
+- APPROVED: No findings
 
-If you list any critical or regular findings, your verdict MUST be NEEDS_WORK.
-
-Be thorough but fair. Don't bikeshed.
+Be thorough, not pedantic.
diff --git a/modules/home/programs/terminal/opencode/default.nix b/modules/home/programs/terminal/opencode/default.nix
index 7f41e97..ebdc83a 100755
--- a/modules/home/programs/terminal/opencode/default.nix
+++ b/modules/home/programs/terminal/opencode/default.nix
@@ -2,10 +2,15 @@
 , pkgs
 , config
 , namespace
+, osConfig
 , ...
 }:
 let
   inherit (lib) mkIf;
+
+  helpers = import ./lib.nix { inherit lib; };
+  llamaSwapConfig = osConfig.${namespace}.services.llama-swap.config or { };
+
   cfg = config.${namespace}.programs.terminal.opencode;
 in
 {
@@ -21,7 +26,7 @@ in
       enableMcpIntegration = true;
       agents = {
         orchestrator = ./config/agents/orchestrator.md;
-        architect = ./config/agents/architect.md;
+        planner = ./config/agents/planner.md;
         developer = ./config/agents/developer.md;
         reviewer = ./config/agents/reviewer.md;
         agent-creator = ./config/agents/agent-creator.md;
@@ -38,48 +43,13 @@ in
         content = builtins.toJSON {
           "$schema" = "https://opencode.ai/config.json";
           theme = "catppuccin";
-          # model = "llama-swap/devstral-small-2-instruct";
           provider = {
             "llama-swap" = {
               npm = "@ai-sdk/openai-compatible";
               options = {
                 baseURL = "https://llm-api.va.reichard.io/v1";
               };
-              models = {
-                "hf:Qwen/Qwen3-Coder-480B-A35B-Instruct" = {
-                  name = "Qwen3 Coder (480B) Instruct";
-                };
-                "hf:zai-org/GLM-4.7" = {
-                  name = "GLM 4.7";
-                };
-                "hf:MiniMaxAI/MiniMax-M2.1" = {
-                  name = "MiniMax M2.1";
-                };
-                devstral-small-2-instruct = {
-                  name = "Devstral Small 2 (24B)";
-                };
-                qwen3-coder-30b-instruct = {
-                  name = "Qwen3 Coder (30B)";
-                };
-                nemotron-3-nano-30b-thinking = {
-                  name = "Nemotron 3 Nano (30B) - Thinking";
-                };
-                gpt-oss-20b-thinking = {
-                  name = "GPT OSS (20B)";
-                };
-                qwen3-next-80b-instruct = {
-                  name = "Qwen3 Next (80B) - Instruct";
-                };
-                qwen3-30b-2507-thinking = {
-                  name = "Qwen3 2507 (30B) Thinking";
-                };
-                qwen3-30b-2507-instruct = {
-                  name = "Qwen3 2507 (30B) Instruct";
-                };
-                qwen3-4b-2507-instruct = {
-                  name = "Qwen3 2507 (4B) - Instruct";
-                };
-              };
+              models = helpers.toOpencodeModels llamaSwapConfig;
             };
           };
           lsp = {
diff --git a/modules/home/programs/terminal/opencode/lib.nix b/modules/home/programs/terminal/opencode/lib.nix
new file mode 100644
index 0000000..95572f7
--- /dev/null
+++ b/modules/home/programs/terminal/opencode/lib.nix
@@ -0,0 +1,53 @@
+{ lib }:
+let
+  inherit (lib)
+    mapAttrs
+    filterAttrs
+    any
+    flatten
+    listToAttrs
+    nameValuePair
+    ;
+in
+{
+  # Convert llama-swap models to opencode format
+  toOpencodeModels =
+    llamaSwapConfig:
+    let
+      textGenModels = filterAttrs
+        (
+          name: model: any (t: t == "text-generation") (model.metadata.type or [ ])
+        )
+        (llamaSwapConfig.models or { });
+
+      localModels = mapAttrs
+        (
+          name: model:
+            {
+              inherit (model) name;
+            }
+            // (
+              if model.macros.ctx or null != null then
+                {
+                  limit = {
+                    context = lib.toInt model.macros.ctx;
+                    input = lib.toInt model.macros.ctx;
+                    output = lib.toInt model.macros.ctx;
+                  };
+                }
+              else
+                { }
+            )
+        )
+        textGenModels;
+
+      peerModels = listToAttrs (
+        flatten (
+          map (peer: map (modelName: nameValuePair modelName { name = modelName; }) peer.models) (
+            builtins.attrValues (llamaSwapConfig.peers or { })
+          )
+        )
+      );
+    in
+    localModels // peerModels;
+}
diff --git a/modules/home/services/swww/default.nix b/modules/home/services/swww/default.nix
index 5c0a075..83d27ce 100644
--- a/modules/home/services/swww/default.nix
+++ b/modules/home/services/swww/default.nix
@@ -1,4 +1,9 @@
-{ config, lib, pkgs, namespace, ... }:
+{ config
+, lib
+, pkgs
+, namespace
+, ...
+}:
 let
   cfg = config.${namespace}.services.swww;
 in
diff --git a/modules/nixos/services/llama-swap/config.nix b/modules/nixos/services/llama-swap/config.nix
new file mode 100644
index 0000000..5404c87
--- /dev/null
+++ b/modules/nixos/services/llama-swap/config.nix
@@ -0,0 +1,454 @@
+{ pkgs }:
+let
+  llama-cpp = pkgs.reichard.llama-cpp;
+  stable-diffusion-cpp = pkgs.reichard.stable-diffusion-cpp.override {
+    cudaSupport = true;
+  };
+in
+{
+  models = {
+    # https://huggingface.co/unsloth/Devstral-Small-2-24B-Instruct-2512-GGUF/tree/main
+    "devstral-small-2-instruct" = {
+      name = "Devstral Small 2 (24B) - Instruct";
+      macros.ctx = "98304";
+      cmd = ''
+        ${llama-cpp}/bin/llama-server \
+          --port ''${PORT} \
+          -m /mnt/ssd/Models/Devstral/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL.gguf \
+          --chat-template-file /mnt/ssd/Models/Devstral/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL_template.jinja \
+          --temp 0.15 \
+          -c ''${ctx} \
+          -ctk q8_0 \
+          -ctv q8_0 \
+          -fit off \
+          -dev CUDA0
+      '';
+      metadata = {
+        type = [ "text-generation" ];
+      };
+      env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
+    };
+
+    # https://huggingface.co/unsloth/GLM-4-32B-0414-GGUF/tree/main
+    "glm-4-32b-instruct" = {
+      name = "GLM 4 (32B) - Instruct";
+      macros.ctx = "32768";
+      cmd = ''
+        ${llama-cpp}/bin/llama-server \
+          --port ''${PORT} \
+          -m /mnt/ssd/Models/GLM/GLM-4-32B-0414-Q4_K_M.gguf \
+          -c ''${ctx} \
+          --temp 0.6 \
+          --top-k 40 \
+          --top-p 0.95 \
+          --min-p 0.0 \
+          -fit off \
+          -dev CUDA0
+      '';
+      metadata = {
+        type = [ "text-generation" ];
+      };
+      env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
+    };
+
+    # https://huggingface.co/mradermacher/gpt-oss-20b-heretic-v2-i1-GGUF/tree/main
+    "gpt-oss-20b-thinking" = {
+      name = "GPT OSS (20B) - Thinking";
+      macros.ctx = "131072";
+      cmd = ''
+        ${llama-cpp}/bin/llama-server \
+          --port ''${PORT} \
+          -m /mnt/ssd/Models/GPT-OSS/gpt-oss-20b-heretic-v2.i1-MXFP4_MOE.gguf \
+          -c ''${ctx} \
+          --temp 1.0 \
+          --top-p 1.0 \
+          --top-k 40 \
+          -dev CUDA0
+      '';
+      metadata = {
+        type = [ "text-generation" ];
+      };
+      env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
+    };
+
+    # https://huggingface.co/mradermacher/GPT-OSS-Cybersecurity-20B-Merged-i1-GGUF/tree/main
+    "gpt-oss-csec-20b-thinking" = {
+      name = "GPT OSS CSEC (20B) - Thinking";
+      macros.ctx = "131072";
+      cmd = ''
+        ${llama-cpp}/bin/llama-server \
+          --port ''${PORT} \
+          -m /mnt/ssd/Models/GPT-OSS/GPT-OSS-Cybersecurity-20B-Merged.i1-MXFP4_MOE.gguf \
+          -c ''${ctx} \
+          --temp 1.0 \
+          --top-p 1.0 \
+          --top-k 40 \
+          -dev CUDA0
+      '';
+      metadata = {
+        type = [ "text-generation" ];
+      };
+      env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
+    };
+
+    # https://huggingface.co/unsloth/Qwen3-Next-80B-A3B-Instruct-GGUF/tree/main
+    "qwen3-next-80b-instruct" = {
+      name = "Qwen3 Next (80B) - Instruct";
+      macros.ctx = "262144";
+      cmd = ''
+        ${llama-cpp}/bin/llama-server \
+          --port ''${PORT} \
+          -m /mnt/ssd/Models/Qwen3/Qwen3-Next-80B-A3B-Instruct-UD-Q2_K_XL.gguf \
+          -c ''${ctx} \
+          --temp 0.7 \
+          --min-p 0.0 \
+          --top-p 0.8 \
+          --top-k 20 \
+          --repeat-penalty 1.05 \
+          -ctk q8_0 \
+          -ctv q8_0 \
+          -fit off
+      '';
+      metadata = {
+        type = [ "text-generation" ];
+      };
+      env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
+    };
+
+    # https://huggingface.co/unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF/tree/main
+    "qwen3-30b-2507-instruct" = {
+      name = "Qwen3 2507 (30B) - Instruct";
+      macros.ctx = "262144";
+      cmd = ''
+        ${llama-cpp}/bin/llama-server \
+          --port ''${PORT} \
+          -m /mnt/ssd/Models/Qwen3/Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf \
+          -c ''${ctx} \
+          --temp 0.7 \
+          --min-p 0.0 \
+          --top-p 0.8 \
+          --top-k 20 \
+          --presence-penalty 1.0 \
+          --repeat-penalty 1.0 \
+          -ctk q8_0 \
+          -ctv q8_0 \
+          -ts 70,30 \
+          -fit off
+      '';
+      metadata = {
+        type = [ "text-generation" ];
+      };
+      env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
+    };
+
+    # https://huggingface.co/unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF/tree/main
+    "qwen3-coder-30b-instruct" = {
+      name = "Qwen3 Coder (30B) - Instruct";
+      macros.ctx = "131072";
+      cmd = ''
+        ${llama-cpp}/bin/llama-server \
+          --port ''${PORT} \
+          -m /mnt/ssd/Models/Qwen3/Qwen3-Coder-30B-A3B-Instruct-UD-Q6_K_XL.gguf \
+          -c ''${ctx} \
+          --temp 0.7 \
+          --min-p 0.0 \
+          --top-p 0.8 \
+          --top-k 20 \
+          --repeat-penalty 1.05 \
+          -ctk q8_0 \
+          -ctv q8_0 \
+          -ts 70,30 \
+          -fit off
+      '';
+      metadata = {
+        type = [ "text-generation" ];
+      };
+      env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
+    };
+
+    # https://huggingface.co/unsloth/Qwen3-30B-A3B-Thinking-2507-GGUF/tree/main
+    "qwen3-30b-2507-thinking" = {
+      name = "Qwen3 2507 (30B) - Thinking";
+      macros.ctx = "262144";
+      cmd = ''
+        ${llama-cpp}/bin/llama-server \
+          --port ''${PORT} \
+          -m /mnt/ssd/Models/Qwen3/Qwen3-30B-A3B-Thinking-2507-UD-Q4_K_XL.gguf \
+          -c ''${ctx} \
+          --temp 0.6 \
+          --min-p 0.0 \
+          --top-p 0.95 \
+          --top-k 20 \
+          --presence-penalty 1.0 \
+          --repeat-penalty 1.0 \
+          -ctk q8_0 \
+          -ctv q8_0 \
+          -ts 70,30 \
+          -fit off
+      '';
+      metadata = {
+        type = [ "text-generation" ];
+      };
+      env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
+    };
+
+    # https://huggingface.co/unsloth/Nemotron-3-Nano-30B-A3B-GGUF/tree/main
+    "nemotron-3-nano-30b-thinking" = {
+      name = "Nemotron 3 Nano (30B) - Thinking";
+      macros.ctx = "1048576";
+      cmd = ''
+        ${llama-cpp}/bin/llama-server \
+          --port ''${PORT} \
+          -m /mnt/ssd/Models/Nemotron/Nemotron-3-Nano-30B-A3B-UD-Q4_K_XL.gguf \
+          -c ''${ctx} \
+          --temp 1.1 \
+          --top-p 0.95 \
+          -fit off
+      '';
+      metadata = {
+        type = [ "text-generation" ];
+      };
+      env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
+    };
+
+    # https://huggingface.co/unsloth/Qwen3-VL-8B-Instruct-GGUF/tree/main
+    "qwen3-8b-vision" = {
+      name = "Qwen3 Vision (8B) - Thinking";
+      macros.ctx = "65536";
+      cmd = ''
+        ${llama-cpp}/bin/llama-server \
+          --port ''${PORT} \
+          -m /mnt/ssd/Models/Qwen3/Qwen3-VL-8B-Instruct-UD-Q4_K_XL.gguf \
+          --mmproj /mnt/ssd/Models/Qwen3/Qwen3-VL-8B-Instruct-UD-Q4_K_XL_mmproj-F16.gguf \
+          -c ''${ctx} \
+          --temp 0.7 \
+          --min-p 0.0 \
+          --top-p 0.8 \
+          --top-k 20 \
+          -ctk q8_0 \
+          -ctv q8_0 \
+          -fit off \
+          -dev CUDA1
+      '';
+      metadata = {
+        type = [ "text-generation" ];
+      };
+      env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
+    };
+
+    # https://huggingface.co/unsloth/Qwen2.5-Coder-7B-Instruct-128K-GGUF/tree/main
+    "qwen2.5-coder-7b-instruct" = {
+      name = "Qwen2.5 Coder (7B) - Instruct";
+      macros.ctx = "131072";
+      cmd = ''
+        ${llama-cpp}/bin/llama-server \
+          -m /mnt/ssd/Models/Qwen2.5/Qwen2.5-Coder-7B-Instruct-Q8_0.gguf \
+          --fim-qwen-7b-default \
+          -c ''${ctx} \
+          --port ''${PORT} \
+          -fit off \
+          -dev CUDA1
+      '';
+      metadata = {
+        type = [ "text-generation" ];
+      };
+      env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
+    };
+
+    # https://huggingface.co/unsloth/Qwen2.5-Coder-3B-Instruct-128K-GGUF/tree/main
+    "qwen2.5-coder-3b-instruct" = {
+      name = "Qwen2.5 Coder (3B) - Instruct";
+      macros.ctx = "131072";
+      cmd = ''
+        ${llama-cpp}/bin/llama-server \
+          -m /mnt/ssd/Models/Qwen2.5/Qwen2.5-Coder-3B-Instruct-Q8_0.gguf \
+          --fim-qwen-3b-default \
+          --port ''${PORT} \
+          -c ''${ctx} \
+          -fit off \
+          -dev CUDA1
+      '';
+      metadata = {
+        type = [ "text-generation" ];
+      };
+      env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
+    };
+
+    # https://huggingface.co/unsloth/Qwen3-4B-Instruct-2507-GGUF/tree/main
+    "qwen3-4b-2507-instruct" = {
+      name = "Qwen3 2507 (4B) - Instruct";
+      macros.ctx = "98304";
+      cmd = ''
+        ${llama-cpp}/bin/llama-server \
+          --port ''${PORT} \
+          -m /mnt/ssd/Models/Qwen3/Qwen3-4B-Instruct-2507-Q4_K_M.gguf \
+          -c ''${ctx} \
+          -fit off \
+          -ctk q8_0 \
+          -ctv q8_0 \
+          -dev CUDA1
+      '';
+      metadata = {
+        type = [ "text-generation" ];
+      };
+      env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
+    };
+
+    # ---------------------------------------
+    # ---------- Stable Diffussion ----------
+    # ---------------------------------------
+
+    "z-image-turbo" = {
+      name = "Z-Image-Turbo";
+      checkEndpoint = "/";
+      cmd = ''
+        ${stable-diffusion-cpp}/bin/sd-server \
+          --listen-port ''${PORT} \
+          --diffusion-fa \
+          --diffusion-model /mnt/ssd/StableDiffusion/ZImageTurbo/z-image-turbo-Q8_0.gguf \
+          --vae /mnt/ssd/StableDiffusion/ZImageTurbo/ae.safetensors \
+          --llm /mnt/ssd/Models/Qwen3/Qwen3-4B-Instruct-2507-Q4_K_M.gguf \
+          --cfg-scale 1.0 \
+          --steps 8 \
+          --rng cuda
+      '';
+      metadata = {
+        type = [ "image-generation" ];
+      };
+      env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
+    };
+
+    "qwen-image-edit-2511" = {
+      name = "Qwen Image Edit 2511";
+      checkEndpoint = "/";
+      cmd = ''
+        ${stable-diffusion-cpp}/bin/sd-server \
+          --listen-port ''${PORT} \
+          --diffusion-fa \
+          --qwen-image-zero-cond-t \
+          --diffusion-model /mnt/ssd/StableDiffusion/QwenImage/qwen-image-edit-2511-Q5_K_M.gguf \
+          --vae /mnt/ssd/StableDiffusion/QwenImage/qwen_image_vae.safetensors \
+          --llm /mnt/ssd/Models/Qwen2.5/Qwen2.5-VL-7B-Instruct.Q4_K_M.gguf \
+          --lora-model-dir /mnt/ssd/StableDiffusion/QwenImage/Loras \
+          --cfg-scale 2.5 \
+          --sampling-method euler \
+          --flow-shift 3 \
+          --steps 20 \
+          --rng cuda
+      '';
+      metadata = {
+        type = [
+          "image-edit"
+          "image-generation"
+        ];
+      };
+      env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
+    };
+
+    "qwen-image-2512" = {
+      name = "Qwen Image 2512";
+      checkEndpoint = "/";
+      cmd = ''
+        ${stable-diffusion-cpp}/bin/sd-server \
+          --listen-port ''${PORT} \
+          --diffusion-fa \
+          --diffusion-model /mnt/ssd/StableDiffusion/QwenImage/qwen-image-2512-Q5_K_M.gguf \
+          --vae /mnt/ssd/StableDiffusion/QwenImage/qwen_image_vae.safetensors \
+          --llm /mnt/ssd/Models/Qwen2.5/Qwen2.5-VL-7B-Instruct.Q4_K_M.gguf \
+          --lora-model-dir /mnt/ssd/StableDiffusion/QwenImage/Loras \
+          --cfg-scale 2.5 \
+          --sampling-method euler \
+          --flow-shift 3 \
+          --steps 20 \
+          --rng cuda
+      '';
+      metadata = {
+        type = [ "image-generation" ];
+      };
+      env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
+    };
+
+    "chroma-radiance" = {
+      name = "Chroma Radiance";
+      checkEndpoint = "/";
+      cmd = ''
+        ${stable-diffusion-cpp}/bin/sd-server \
+          --listen-port ''${PORT} \
+          --diffusion-fa --chroma-disable-dit-mask \
+          --diffusion-model /mnt/ssd/StableDiffusion/Chroma/chroma_radiance_x0_q8.gguf \
+          --t5xxl /mnt/ssd/StableDiffusion/Chroma/t5xxl_fp16.safetensors \
+          --cfg-scale 4.0 \
+          --sampling-method euler \
+          --rng cuda
+      '';
+      metadata = {
+        type = [ "image-generation" ];
+      };
+      env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
+    };
+  };
+
+  groups = {
+    shared = {
+      swap = true;
+      exclusive = false;
+      members = [
+        "nemotron-3-nano-30b-thinking"
+        "qwen3-30b-2507-instruct"
+        "qwen3-30b-2507-thinking"
+        "qwen3-coder-30b-instruct"
+        "qwen3-next-80b-instruct"
+      ];
+    };
+
+    cuda0 = {
+      swap = true;
+      exclusive = false;
+      members = [
+        "devstral-small-2-instruct"
+        "glm-4-32b-instruct"
+        "gpt-oss-20b-thinking"
+        "gpt-oss-csec-20b-thinking"
+      ];
+    };
+
+    cuda1 = {
+      swap = true;
+      exclusive = false;
+      members = [
+        "qwen2.5-coder-3b-instruct"
+        "qwen2.5-coder-7b-instruct"
+        "qwen3-4b-2507-instruct"
+        "qwen3-8b-vision"
+      ];
+    };
+  };
+
+  peers = {
+    synthetic = {
+      proxy = "https://api.synthetic.new/openai/";
+      models = [
+        "hf:deepseek-ai/DeepSeek-R1-0528"
+        "hf:deepseek-ai/DeepSeek-V3"
+        "hf:deepseek-ai/DeepSeek-V3-0324"
+        "hf:deepseek-ai/DeepSeek-V3.1"
+        "hf:deepseek-ai/DeepSeek-V3.1-Terminus"
+        "hf:deepseek-ai/DeepSeek-V3.2"
+        "hf:meta-llama/Llama-3.3-70B-Instruct"
+        "hf:meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
+        "hf:MiniMaxAI/MiniMax-M2"
+        "hf:MiniMaxAI/MiniMax-M2.1"
+        "hf:moonshotai/Kimi-K2-Instruct-0905"
+        "hf:moonshotai/Kimi-K2-Thinking"
+        "hf:openai/gpt-oss-120b"
+        "hf:Qwen/Qwen3-235B-A22B-Instruct-2507"
+        "hf:Qwen/Qwen3-235B-A22B-Thinking-2507"
+        "hf:Qwen/Qwen3-Coder-480B-A35B-Instruct"
+        "hf:Qwen/Qwen3-VL-235B-A22B-Instruct"
+        "hf:zai-org/GLM-4.5"
+        "hf:zai-org/GLM-4.6"
+        "hf:zai-org/GLM-4.7"
+      ];
+    };
+  };
+}
diff --git a/modules/nixos/services/llama-swap/default.nix b/modules/nixos/services/llama-swap/default.nix
index 2aebe5e..53c388d 100644
--- a/modules/nixos/services/llama-swap/default.nix
+++ b/modules/nixos/services/llama-swap/default.nix
@@ -5,18 +5,20 @@
 , ...
 }:
 let
-  inherit (lib) mkIf mkEnableOption;
+  inherit (lib) mkIf mkEnableOption recursiveUpdate;
   cfg = config.${namespace}.services.llama-swap;
 
   llama-swap = pkgs.reichard.llama-swap;
-  llama-cpp = pkgs.reichard.llama-cpp;
-  stable-diffusion-cpp = pkgs.reichard.stable-diffusion-cpp.override {
-    cudaSupport = true;
-  };
 in
 {
   options.${namespace}.services.llama-swap = {
     enable = mkEnableOption "enable llama-swap service";
+    config = lib.mkOption {
+      type = lib.types.unspecified;
+      default = import ./config.nix { inherit pkgs; };
+      readOnly = true;
+      description = "The llama-swap configuration data";
+    };
   };
 
   config = mkIf cfg.enable {
@@ -92,413 +94,11 @@ in
         owner = "llama-swap";
         group = "llama-swap";
         mode = "0400";
-        content = builtins.toJSON {
-          models = {
-            # https://huggingface.co/unsloth/Devstral-Small-2-24B-Instruct-2512-GGUF/tree/main
-            "devstral-small-2-instruct" = {
-              name = "Devstral Small 2 (24B) - Instruct";
-              cmd = ''
-                ${llama-cpp}/bin/llama-server \
-                  --port ''${PORT} \
-                  -m /mnt/ssd/Models/Devstral/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL.gguf \
-                  --chat-template-file /mnt/ssd/Models/Devstral/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL_template.jinja \
-                  --temp 0.15 \
-                  -c 98304 \
-                  -ctk q8_0 \
-                  -ctv q8_0 \
-                  -fit off \
-                  -dev CUDA0
-              '';
-              metadata = {
-                type = [ "text-generation" ];
-              };
-              env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
-            };
-
-            # https://huggingface.co/mradermacher/gpt-oss-20b-heretic-v2-i1-GGUF/tree/main
-            "gpt-oss-20b-thinking" = {
-              name = "GPT OSS (20B) - Thinking";
-              cmd = ''
-                ${llama-cpp}/bin/llama-server \
-                  --port ''${PORT} \
-                  -m /mnt/ssd/Models/GPT-OSS/gpt-oss-20b-heretic-v2.i1-MXFP4_MOE.gguf \
-                  -c 131072 \
-                  --temp 1.0 \
-                  --top-p 1.0 \
-                  --top-k 40 \
-                  -dev CUDA0
-              '';
-              metadata = {
-                type = [ "text-generation" ];
-              };
-              env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
-            };
-
-            # https://huggingface.co/mradermacher/GPT-OSS-Cybersecurity-20B-Merged-i1-GGUF/tree/main
-            "gpt-oss-csec-20b-thinking" = {
-              name = "GPT OSS CSEC (20B) - Thinking";
-              cmd = ''
-                ${llama-cpp}/bin/llama-server \
-                  --port ''${PORT} \
-                  -m /mnt/ssd/Models/GPT-OSS/GPT-OSS-Cybersecurity-20B-Merged.i1-MXFP4_MOE.gguf \
-                  -c 131072 \
-                  --temp 1.0 \
-                  --top-p 1.0 \
-                  --top-k 40 \
-                  -dev CUDA0
-              '';
-              metadata = {
-                type = [ "text-generation" ];
-              };
-              env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
-            };
-
-            # https://huggingface.co/unsloth/Qwen3-Next-80B-A3B-Instruct-GGUF/tree/main
-            "qwen3-next-80b-instruct" = {
-              name = "Qwen3 Next (80B) - Instruct";
-              cmd = ''
-                ${llama-cpp}/bin/llama-server \
-                  --port ''${PORT} \
-                  -m /mnt/ssd/Models/Qwen3/Qwen3-Next-80B-A3B-Instruct-UD-Q2_K_XL.gguf \
-                  -c 262144 \
-                  --temp 0.7 \
-                  --min-p 0.0 \
-                  --top-p 0.8 \
-                  --top-k 20 \
-                  --repeat-penalty 1.05 \
-                  -ctk q8_0 \
-                  -ctv q8_0 \
-                  -fit off
-              '';
-              metadata = {
-                type = [ "text-generation" ];
-              };
-              env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
-            };
-
-            # https://huggingface.co/unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF/tree/main
-            "qwen3-30b-2507-instruct" = {
-              name = "Qwen3 2507 (30B) - Instruct";
-              cmd = ''
-                ${llama-cpp}/bin/llama-server \
-                  --port ''${PORT} \
-                  -m /mnt/ssd/Models/Qwen3/Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf \
-                  -c 262144 \
-                  --temp 0.7 \
-                  --min-p 0.0 \
-                  --top-p 0.8 \
-                  --top-k 20 \
-                  --repeat-penalty 1.05 \
-                  -ctk q8_0 \
-                  -ctv q8_0 \
-                  -ts 70,30 \
-                  -fit off
-              '';
-              metadata = {
-                type = [ "text-generation" ];
-              };
-              env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
-            };
-
-            # https://huggingface.co/unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF/tree/main
-            "qwen3-coder-30b-instruct" = {
-              name = "Qwen3 Coder (30B) - Instruct";
-              cmd = ''
-                ${llama-cpp}/bin/llama-server \
-                  --port ''${PORT} \
-                  -m /mnt/ssd/Models/Qwen3/Qwen3-Coder-30B-A3B-Instruct-UD-Q6_K_XL.gguf \
-                  -c 131072 \
-                  --temp 0.7 \
-                  --min-p 0.0 \
-                  --top-p 0.8 \
-                  --top-k 20 \
-                  --repeat-penalty 1.05 \
-                  -ctk q8_0 \
-                  -ctv q8_0 \
-                  -ts 70,30 \
-                  -fit off
-              '';
-              metadata = {
-                type = [ "text-generation" ];
-              };
-              env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
-            };
-
-            # https://huggingface.co/unsloth/Qwen3-30B-A3B-Thinking-2507-GGUF/tree/main
-            "qwen3-30b-2507-thinking" = {
-              name = "Qwen3 2507 (30B) - Thinking";
-              cmd = ''
-                ${llama-cpp}/bin/llama-server \
-                  --port ''${PORT} \
-                  -m /mnt/ssd/Models/Qwen3/Qwen3-30B-A3B-Thinking-2507-UD-Q4_K_XL.gguf \
-                  -c 262144 \
-                  --temp 0.7 \
-                  --min-p 0.0 \
-                  --top-p 0.8 \
-                  --top-k 20 \
-                  --repeat-penalty 1.05 \
-                  -ctk q8_0 \
-                  -ctv q8_0 \
-                  -ts 70,30 \
-                  -fit off
-              '';
-              metadata = {
-                type = [ "text-generation" ];
-              };
-              env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
-            };
-
-            # https://huggingface.co/unsloth/Nemotron-3-Nano-30B-A3B-GGUF/tree/main
-            "nemotron-3-nano-30b-thinking" = {
-              name = "Nemotron 3 Nano (30B) - Thinking";
-              cmd = ''
-                ${llama-cpp}/bin/llama-server \
-                  --port ''${PORT} \
-                  -m /mnt/ssd/Models/Nemotron/Nemotron-3-Nano-30B-A3B-UD-Q4_K_XL.gguf \
-                  -c 1048576 \
-                  --temp 1.1 \
-                  --top-p 0.95 \
-                  -fit off
-              '';
-              metadata = {
-                type = [ "text-generation" ];
-              };
-              env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
-            };
-
-            # https://huggingface.co/unsloth/Qwen3-VL-8B-Instruct-GGUF/tree/main
-            "qwen3-8b-vision" = {
-              name = "Qwen3 Vision (8B) - Thinking";
-              cmd = ''
-                ${llama-cpp}/bin/llama-server \
-                  --port ''${PORT} \
-                  -m /mnt/ssd/Models/Qwen3/Qwen3-VL-8B-Instruct-UD-Q4_K_XL.gguf \
-                  --mmproj /mnt/ssd/Models/Qwen3/Qwen3-VL-8B-Instruct-UD-Q4_K_XL_mmproj-F16.gguf \
-                  -c 65536 \
-                  --temp 0.7 \
-                  --min-p 0.0 \
-                  --top-p 0.8 \
-                  --top-k 20 \
-                  -ctk q8_0 \
-                  -ctv q8_0 \
-                  -fit off \
-                  -dev CUDA1
-              '';
-              metadata = {
-                type = [ "text-generation" ];
-              };
-              env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
-            };
-
-            # https://huggingface.co/unsloth/Qwen2.5-Coder-7B-Instruct-128K-GGUF/tree/main
-            "qwen2.5-coder-7b-instruct" = {
-              name = "Qwen2.5 Coder (7B) - Instruct";
-              cmd = ''
-                ${llama-cpp}/bin/llama-server \
-                  -m /mnt/ssd/Models/Qwen2.5/Qwen2.5-Coder-7B-Instruct-Q8_0.gguf \
-                  --fim-qwen-7b-default \
-                  -c 131072 \
-                  --port ''${PORT} \
-                  -fit off \
-                  -dev CUDA1
-              '';
-              metadata = {
-                type = [ "text-generation" ];
-              };
-              env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
-            };
-
-            # https://huggingface.co/unsloth/Qwen2.5-Coder-3B-Instruct-128K-GGUF/tree/main
-            "qwen2.5-coder-3b-instruct" = {
-              name = "Qwen2.5 Coder (3B) - Instruct";
-              cmd = ''
-                ${llama-cpp}/bin/llama-server \
-                  -m /mnt/ssd/Models/Qwen2.5/Qwen2.5-Coder-3B-Instruct-Q8_0.gguf \
-                  --fim-qwen-3b-default \
-                  --port ''${PORT} \
-                  -fit off \
-                  -dev CUDA1
-              '';
-              metadata = {
-                type = [ "text-generation" ];
-              };
-              env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
-            };
-
-            # https://huggingface.co/unsloth/Qwen3-4B-Instruct-2507-GGUF/tree/main
-            "qwen3-4b-2507-instruct" = {
-              name = "Qwen3 2507 (4B) - Instruct";
-              cmd = ''
-                ${llama-cpp}/bin/llama-server \
-                  --port ''${PORT} \
-                  -m /mnt/ssd/Models/Qwen3/Qwen3-4B-Instruct-2507-Q4_K_M.gguf \
-                  -c 98304 \
-                  -fit off \
-                  -ctk q8_0 \
-                  -ctv q8_0 \
-                  -dev CUDA1
-              '';
-              metadata = {
-                type = [ "text-generation" ];
-              };
-              env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
-            };
-
-            "z-image-turbo" = {
-              name = "Z-Image-Turbo";
-              checkEndpoint = "/";
-              cmd = ''
-                ${stable-diffusion-cpp}/bin/sd-server \
-                  --listen-port ''${PORT} \
-                  --diffusion-fa \
-                  --diffusion-model /mnt/ssd/StableDiffusion/ZImageTurbo/z-image-turbo-Q8_0.gguf \
-                  --vae /mnt/ssd/StableDiffusion/ZImageTurbo/ae.safetensors \
-                  --llm /mnt/ssd/Models/Qwen3/Qwen3-4B-Instruct-2507-Q4_K_M.gguf \
-                  --cfg-scale 1.0 \
-                  --steps 8 \
-                  --rng cuda
-              '';
-              metadata = {
-                type = [ "image-generation" ];
-              };
-              env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
-            };
-
-            # https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/tree/main
-            "qwen-image-edit-2511" = {
-              name = "Qwen Image Edit 2511";
-              checkEndpoint = "/";
-              cmd = ''
-                ${stable-diffusion-cpp}/bin/sd-server \
-                  --listen-port ''${PORT} \
-                  --diffusion-fa \
-                  --qwen-image-zero-cond-t \
-                  --diffusion-model /mnt/ssd/StableDiffusion/QwenImage/qwen-image-edit-2511-Q5_K_M.gguf \
-                  --vae /mnt/ssd/StableDiffusion/QwenImage/qwen_image_vae.safetensors \
-                  --llm /mnt/ssd/Models/Qwen2.5/Qwen2.5-VL-7B-Instruct.Q4_K_M.gguf \
-                  --lora-model-dir /mnt/ssd/StableDiffusion/QwenImage/Loras \
-                  --cfg-scale 2.5 \
-                  --sampling-method euler \
-                  --flow-shift 3 \
-                  --steps 20 \
-                  --rng cuda
-              '';
-              metadata = {
-                type = [
-                  "image-edit"
-                  "image-generation"
-                ];
-              };
-              env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
-            };
-
-            "qwen-image-2512" = {
-              name = "Qwen Image 2512";
-              checkEndpoint = "/";
-              cmd = ''
-                ${stable-diffusion-cpp}/bin/sd-server \
-                  --listen-port ''${PORT} \
-                  --diffusion-fa \
-                  --diffusion-model /mnt/ssd/StableDiffusion/QwenImage/qwen-image-2512-Q5_K_M.gguf \
-                  --vae /mnt/ssd/StableDiffusion/QwenImage/qwen_image_vae.safetensors \
-                  --llm /mnt/ssd/Models/Qwen2.5/Qwen2.5-VL-7B-Instruct.Q4_K_M.gguf \
-                  --lora-model-dir /mnt/ssd/StableDiffusion/QwenImage/Loras \
-                  --cfg-scale 2.5 \
-                  --sampling-method euler \
-                  --flow-shift 3 \
-                  --steps 20 \
-                  --rng cuda
-              '';
-              metadata = {
-                type = [ "image-generation" ];
-              };
-              env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
-            };
-
-            "chroma-radiance" = {
-              name = "Chroma Radiance";
-              checkEndpoint = "/";
-              cmd = ''
-                ${stable-diffusion-cpp}/bin/sd-server \
-                  --listen-port ''${PORT} \
-                  --diffusion-fa --chroma-disable-dit-mask \
-                  --diffusion-model /mnt/ssd/StableDiffusion/Chroma/chroma_radiance_x0_q8.gguf \
-                  --t5xxl /mnt/ssd/StableDiffusion/Chroma/t5xxl_fp16.safetensors \
-                  --cfg-scale 4.0 \
-                  --sampling-method euler \
-                  --rng cuda
-              '';
-              metadata = {
-                type = [ "image-generation" ];
-              };
-              env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
-            };
-          };
-
-          groups = {
-            shared = {
-              swap = true;
-              exclusive = false;
-              members = [
-                "nemotron-3-nano-30b-thinking"
-                "qwen3-30b-2507-instruct"
-                "qwen3-30b-2507-thinking"
-                "qwen3-coder-30b-instruct"
-                "qwen3-next-80b-instruct"
-              ];
-            };
-
-            cuda0 = {
-              swap = true;
-              exclusive = false;
-              members = [
-                "devstral-small-2-instruct"
-                "gpt-oss-20b-thinking"
-                "gpt-oss-csec-20b-thinking"
-              ];
-            };
-
-            cuda1 = {
-              swap = true;
-              exclusive = false;
-              members = [
-                "qwen2.5-coder-3b-instruct"
-                "qwen2.5-coder-7b-instruct"
-                "qwen3-4b-2507-instruct"
-                "qwen3-8b-vision"
-              ];
-            };
-          };
-
-          peers = {
-            synthetic = {
-              proxy = "https://api.synthetic.new/openai/";
-              apiKey = "${config.sops.placeholder.synthetic_apikey}";
-              models = [
-                "hf:deepseek-ai/DeepSeek-R1-0528"
-                "hf:deepseek-ai/DeepSeek-V3"
-                "hf:deepseek-ai/DeepSeek-V3-0324"
-                "hf:deepseek-ai/DeepSeek-V3.1"
-                "hf:deepseek-ai/DeepSeek-V3.1-Terminus"
-                "hf:deepseek-ai/DeepSeek-V3.2"
-                "hf:meta-llama/Llama-3.3-70B-Instruct"
-                "hf:meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
-                "hf:MiniMaxAI/MiniMax-M2"
-                "hf:MiniMaxAI/MiniMax-M2.1"
-                "hf:moonshotai/Kimi-K2-Instruct-0905"
-                "hf:moonshotai/Kimi-K2-Thinking"
-                "hf:openai/gpt-oss-120b"
-                "hf:Qwen/Qwen3-235B-A22B-Instruct-2507"
-                "hf:Qwen/Qwen3-235B-A22B-Thinking-2507"
-                "hf:Qwen/Qwen3-Coder-480B-A35B-Instruct"
-                "hf:Qwen/Qwen3-VL-235B-A22B-Instruct"
-                "hf:zai-org/GLM-4.5"
-                "hf:zai-org/GLM-4.6"
-                "hf:zai-org/GLM-4.7"
-              ];
-            };
-          };
-        };
+        content = builtins.toJSON (
+          recursiveUpdate cfg.config {
+            peers.synthetic.apiKey = config.sops.placeholder.synthetic_apikey;
+          }
+        );
       };
     };