chore: various improvements & refactor

This commit is contained in:
2026-01-16 10:20:29 -05:00
parent 51cd993f89
commit 85292145c8
10 changed files with 707 additions and 654 deletions

View File

@@ -1,33 +1,16 @@
local llm_endpoint = "https://llm-api.va.reichard.io" local llm_endpoint = "https://llm-api.va.reichard.io"
local llm_assistant_model = "devstral-small-2-instruct" local llm_assistant_model = "qwen3-coder-30b-instruct"
local llm_infill_model = "qwen2.5-coder-3b-instruct" local llm_infill_model = "qwen3-coder-30b-instruct"
-- Default Llama - Toggle Llama & Copilot -- local llm_assistant_model = "devstral-small-2-instruct"
local current_fim = "llama" -- local llm_infill_model = "qwen2.5-coder-3b-instruct"
local function switch_llm_fim_provider(switch_to)
if switch_to == "llama" then
vim.g.copilot_filetypes = { ["*"] = true }
vim.cmd("Copilot disable")
vim.cmd("LlamaEnable")
current_fim = "llama"
vim.notify("Llama FIM enabled", vim.log.levels.INFO)
else
vim.g.copilot_filetypes = { ["*"] = true }
vim.cmd("Copilot enable")
vim.cmd("LlamaDisable")
current_fim = "copilot"
vim.notify("Copilot FIM enabled", vim.log.levels.INFO)
end
end
vim.api.nvim_create_autocmd("VimEnter", {
callback = function() local current_fim = "copilot" -- change this to switch default
switch_llm_fim_provider(current_fim)
end,
})
-- Copilot Configuration -- Copilot Configuration
vim.g.copilot_no_tab_map = true vim.g.copilot_no_tab_map = true
vim.g.copilot_filetypes = { ["*"] = true }
-- LLama LLM FIM -- LLama LLM FIM
vim.g.llama_config = { vim.g.llama_config = {
@@ -35,9 +18,24 @@ vim.g.llama_config = {
model = llm_infill_model, model = llm_infill_model,
n_predict = 2048, n_predict = 2048,
ring_n_chunks = 32, ring_n_chunks = 32,
enable_at_startup = false, enable_at_startup = (current_fim == "llama"), -- enable based on default
} }
-- Toggle function for manual switching
local function switch_llm_fim_provider(switch_to)
if switch_to == "llama" then
vim.cmd("Copilot disable")
vim.cmd("LlamaEnable")
current_fim = "llama"
vim.notify("Llama FIM enabled", vim.log.levels.INFO)
else
vim.cmd("Copilot enable")
vim.cmd("LlamaDisable")
current_fim = "copilot"
vim.notify("Copilot FIM enabled", vim.log.levels.INFO)
end
end
-- Configure Code Companion -- Configure Code Companion
require("plugins.codecompanion.fidget-spinner"):init() require("plugins.codecompanion.fidget-spinner"):init()
local codecompanion = require("codecompanion") local codecompanion = require("codecompanion")

View File

@@ -1,66 +0,0 @@
---
description: Discovers relevant code and builds a focused implementation plan with exact file references
mode: subagent
temperature: 0.4
permission:
"*": deny
context7_*: allow
glob: allow
grep: allow
list: allow
lsp: allow
read: allow
todoread: allow
todowrite: allow
---
You analyze requirements and discover the relevant code context needed for implementation.
**Your job:**
1. Read through the codebase to understand what exists
2. Identify specific files and line ranges relevant to the task
3. Create a focused plan with exact references for the @developer agent
4. Describe what needs to change and why
**Deliver a compressed context map:**
For each relevant file section, use this format:
`path/file.py:10-25` - Current behavior. Needed change.
Keep it to ONE sentence per part (what it does, what needs changing).
**Example:**
`auth.py:45-67` - Login function with basic validation. Add rate limiting using existing middleware pattern.
`middleware/rate_limit.py:10-35` - Rate limiter for API endpoints. Reference this implementation.
`config.py:78` - Rate limit config (5 req/min). Use these values.
**Don't include:**
- Full code snippets (developer will read the files)
- Detailed explanations (just pointers)
- Implementation details (that's developer's job)
**Do include:**
- Exact line ranges so developer reads only what's needed
- Key constraints or patterns to follow
- Dependencies between files
**Examples of good references:**
- "`auth.py:45-67` - login function, needs error handling"
- "`db.py:12-30` - connection logic, check timeout handling"
- "`api/routes.py:89` - endpoint definition to modify"
- "`tests/test_auth.py:23-45` - existing tests to update"
**Examples of good plans:**
"Add rate limiting to login:
- `auth.py:45-67` - Current login function with no rate limiting
- `middleware/rate_limit.py:10-35` - Existing rate limiter for API
- Need: Apply same pattern to login endpoint
- Related: `config.py:78` - Rate limit settings"
You're the context scout - provide precise pointers so @developer doesn't waste context searching.

View File

@@ -1,5 +1,5 @@
--- ---
description: Implements code based on plans and addresses review feedback description: Implements code from plans and review feedback
mode: subagent mode: subagent
temperature: 0.3 temperature: 0.3
permission: permission:
@@ -16,61 +16,29 @@ permission:
todowrite: allow todowrite: allow
--- ---
You implement code. You are the only agent that modifies files. You implement code. You're the only agent that modifies files.
**DO NOT re-analyze or re-plan.** @architect already did discovery and planning. You execute. **Input:**
**When building from a plan:** - Plan file path from @planner
- Optional: Review feedback from @reviewer
- Start with the specific files and lines mentioned in the plan **Workflow:**
- Read incrementally if you need to understand:
- Function/class definitions referenced in those lines
- Import sources or dependencies
- Related code that must be updated together
- Stop reading once you understand what to change and how
- Don't search the entire codebase or read files "just in case"
- Trust the plan's pointers as your starting point
**Example workflow:**
1. Plan says: `auth.py:45-67` - Read lines 45-67
2. See it calls `validate_user()` - Read that function definition
3. Realize validate_user is imported from `utils.py` - Read that too
4. Implement changes across both files
5. Done
**When addressing review feedback:**
- **Critical findings** (security, logic errors): Must fix
- **Regular findings** (quality, errors): Must fix
- **Nits** (style, minor): Optional, use judgment
**Your workflow:**
1. Read the specific files mentioned in the plan
2. Implement the changes described
3. **When done, commit your work:**
1. Read the plan file
2. Read the specific files/lines mentioned in context maps
3. Read incrementally if needed (imports, function definitions, etc.)
4. Implement changes
5. Commit:
```bash ```bash
git add -A git add -A
git commit -m "type: what you implemented" git commit -m "type: description"
``` ```
Types: `feat`, `fix`, `refactor`, `docs`, `test`, `chore`
**Conventional commit types:** **Rules:**
- `feat:` - New feature
- `fix:` - Bug fix
- `refactor:` - Code restructuring
- `docs:` - Documentation only
- `test:` - Adding/updating tests
- `chore:` - Maintenance tasks
4. Done - Trust the plan - don't re-analyze or re-plan
- Start with context map locations, expand only as needed
**Do NOT:** - Fix all critical/regular findings, use judgment on nits
- Stop reading once you understand the change
- Re-read the entire codebase
- Search for additional context
- Second-guess the plan
- Do your own discovery phase
Be efficient. Trust @architect's context work. Just code.

View File

@@ -1,46 +1,37 @@
--- ---
description: Orchestrates features or bug fixes by delegating to subagents description: Orchestrates development by delegating to subagents
mode: primary mode: primary
temperature: 0.2 temperature: 0.2
maxSteps: 50 maxSteps: 50
permission: permission:
"*": deny "*": deny
task: allow task:
"*": deny
planner: allow
developer: allow
reviewer: allow
--- ---
You are a workflow orchestrator. You ONLY call subagents - you never analyze, plan, code, or review yourself. Your high level flow is @architect -> @developer -> @reviewer You orchestrate development by delegating to subagents. Never code yourself.
**Your subagents:** **Subagents:**
- **@architect** - Analyzes requirements and creates plans - **@planner** - Creates implementation plans in `./plans/`
- **@developer** - Implements the plan from @architect - **@developer** - Implements from plan files
- **@reviewer** - Reviews the implementation from @developer - **@reviewer** - Reviews implementations
**Your workflow:** **Workflow:**
1. Call @architect with user requirements. 1. **Plan**: Call @planner with requirements
2. Present the plan to the user for approval or changes. 2. **Review Plan**: Show user the plan path, ask for approval
3. If the user requests changes: 3. **Develop**: Call @developer with plan file path
- Call @architect again with the feedback. 4. **Review Code**: Call @reviewer with implementation
- Repeat step 2. 5. **Iterate**: If NEEDS_WORK, call @developer with plan + feedback
4. Once the plan is approved, call @developer with the full, unmodified plan. 6. **Done**: When APPROVED or APPROVED_WITH_NITS
5. Call @reviewer with the @developer output.
6. If the verdict is NEEDS_WORK:
- Call @developer with the plan + review feedback.
7. Repeat steps 5-6 until the implementation is APPROVED or APPROVED_WITH_NITS.
8. Report completion to the user:
- If APPROVED: "Implementation complete and approved."
- If APPROVED_WITH_NITS: "Implementation complete. Optional improvements available: [list nits]. Address these? (yes/no)"
9. If the user wants nits fixed:
- Call @developer with the plan + nit list.
- Call @reviewer one final time.
10. Done.
**Rules:** **Rules:**
- Never do the work yourself - always delegate - Always pass plan file path to @developer (not plan content)
- Pass information between agents clearly, do not leave out context from the previous agent - Include review feedback on iterations
- On iteration 2+ of develop→review, always include both plan AND review feedback - Nits are optional - ask user if they want them fixed
- Keep user informed of which agent is working - Keep user informed of current step
- Nits are optional - don't require fixes
- Stop when code is approved or only nits remain

View File

@@ -0,0 +1,100 @@
---
description: Explores codebase and breaks features into ordered implementation tasks. Writes plans to ./plans/
mode: subagent
temperature: 0.3
permission:
"*": deny
context7_*: allow
edit: allow
glob: allow
grep: allow
list: allow
lsp: allow
read: allow
---
# Code Task Planner Agent
You are a code analysis agent that breaks down feature requests into implementable, independent tasks.
## Your Task
1. **Analyze the codebase** using available tools (grep, lsp, read, etc.)
2. **Identify dependencies** between components
3. **Create ordered tasks** where each task can be implemented independently
4. **Generate context maps** showing exact files and line numbers that need changes
5. **Write the plan** to `./plans/<PLAN_NAME>.md`
## Task Requirements
- **Independent**: Each task should be implementable without future tasks
- **Hierarchical**: Dependencies must come before dependents
- **Specific**: Include exact file paths and line numbers
- **Contextual**: Explain WHY each file matters (1-2 lines max)
## Output Format
Write to `./plans/<PLAN_NAME>.md` with this structure:
```markdown
# Plan: <PLAN_NAME>
## Feature Overview
<feature summary>
## Implementation Tasks
### Task 1: <Descriptive Title>
**Context Map:**
- `<file_path>:<line_number>` - <why it's relevant or what changes>
- `<file_path>:<line_number>` - <why it's relevant or what changes>
---
### Task 2: <Descriptive Title>
**Context Map:**
- `<file_path>:<line_number>` - <why it's relevant or what changes>
---
```
## Analysis Strategy
1. **Start with interfaces/contracts** - these are foundational
2. **Then implementations** - concrete types that satisfy interfaces
3. **Then handlers/controllers** - code that uses the implementations
4. **Finally integrations** - wiring everything together
## Context Map Guidelines
- Use exact line numbers from actual code analysis
- Be specific: "Add AddChat method" not "modify file"
- Include both new additions AND modifications to existing code
- If a file doesn't exist yet, use line 0 and note "new file"
## Example
```markdown
### Task 1: Add Store Interface Methods
**Context Map:**
- `./internal/store/interface.go:15` - Add Conversation struct definition
- `./internal/store/interface.go:28` - Add AddConversation method to Store interface
- `./internal/store/interface.go:32` - Add AddMessage method to Store interface
```
Remember: The context map is what developers see FIRST, so make it count!
## Completion
After writing the plan file, respond with:
**Plan created:** `<PLAN_NAME>`
**Path:** `./plans/<PLAN_NAME>.md`
**Tasks:** <number of tasks>

View File

@@ -1,5 +1,5 @@
--- ---
description: Expert code reviewer providing structured feedback on implementations description: Reviews implementations and provides structured feedback
mode: subagent mode: subagent
temperature: 0.2 temperature: 0.2
permission: permission:
@@ -19,50 +19,35 @@ permission:
read: allow read: allow
--- ---
You are an expert code reviewer. Review implementations and provide structured feedback. You review code implementations.
**Your process:** **Process:**
- Check for uncommitted changes first: `git status` 1. Check `git status` - if uncommitted changes, stop and tell @developer to commit
- If there are uncommitted changes, respond: 2. Review latest commit with `git show`
"ERROR: Found uncommitted changes. @developer must run `git add -A && git commit -m "type: description"` first." 3. Read full files only if needed for context
- Otherwise, review the latest commit with `git show`
- Read full files for additional context only if needed
- Focus on the actual changes made by @developer
**You MUST start your response with a verdict line:** **Response format:**
VERDICT: [APPROVED | NEEDS_WORK | APPROVED_WITH_NITS] VERDICT: [APPROVED | NEEDS_WORK | APPROVED_WITH_NITS]
**Then categorize all findings:** **Critical:** (security, logic errors, data corruption)
**Critical Findings** (must fix): - Finding 1
- Finding 2
- Security vulnerabilities **Regular:** (quality, error handling, performance)
- Logical errors
- Data corruption risks
- Breaking changes
**Regular Findings** (should fix): - Finding 1
- Code quality issues **Nits:** (style, minor improvements)
- Missing error handling
- Performance problems
- Maintainability concerns
**Nits** (optional): - Finding 1
- Style preferences
- Minor optimizations
- Documentation improvements
- Naming suggestions
**Verdict rules:** **Verdict rules:**
- NEEDS_WORK: Any critical or regular findings exist - NEEDS_WORK: Any critical or regular findings
- APPROVED_WITH_NITS: Only nits remain - APPROVED_WITH_NITS: Only nits
- APPROVED: No findings at all - APPROVED: No findings
If you list any critical or regular findings, your verdict MUST be NEEDS_WORK. Be thorough, not pedantic.
Be thorough but fair. Don't bikeshed.

View File

@@ -2,10 +2,15 @@
, pkgs , pkgs
, config , config
, namespace , namespace
, osConfig
, ... , ...
}: }:
let let
inherit (lib) mkIf; inherit (lib) mkIf;
helpers = import ./lib.nix { inherit lib; };
llamaSwapConfig = osConfig.${namespace}.services.llama-swap.config or { };
cfg = config.${namespace}.programs.terminal.opencode; cfg = config.${namespace}.programs.terminal.opencode;
in in
{ {
@@ -21,7 +26,7 @@ in
enableMcpIntegration = true; enableMcpIntegration = true;
agents = { agents = {
orchestrator = ./config/agents/orchestrator.md; orchestrator = ./config/agents/orchestrator.md;
architect = ./config/agents/architect.md; planner = ./config/agents/planner.md;
developer = ./config/agents/developer.md; developer = ./config/agents/developer.md;
reviewer = ./config/agents/reviewer.md; reviewer = ./config/agents/reviewer.md;
agent-creator = ./config/agents/agent-creator.md; agent-creator = ./config/agents/agent-creator.md;
@@ -38,48 +43,13 @@ in
content = builtins.toJSON { content = builtins.toJSON {
"$schema" = "https://opencode.ai/config.json"; "$schema" = "https://opencode.ai/config.json";
theme = "catppuccin"; theme = "catppuccin";
# model = "llama-swap/devstral-small-2-instruct";
provider = { provider = {
"llama-swap" = { "llama-swap" = {
npm = "@ai-sdk/openai-compatible"; npm = "@ai-sdk/openai-compatible";
options = { options = {
baseURL = "https://llm-api.va.reichard.io/v1"; baseURL = "https://llm-api.va.reichard.io/v1";
}; };
models = { models = helpers.toOpencodeModels llamaSwapConfig;
"hf:Qwen/Qwen3-Coder-480B-A35B-Instruct" = {
name = "Qwen3 Coder (480B) Instruct";
};
"hf:zai-org/GLM-4.7" = {
name = "GLM 4.7";
};
"hf:MiniMaxAI/MiniMax-M2.1" = {
name = "MiniMax M2.1";
};
devstral-small-2-instruct = {
name = "Devstral Small 2 (24B)";
};
qwen3-coder-30b-instruct = {
name = "Qwen3 Coder (30B)";
};
nemotron-3-nano-30b-thinking = {
name = "Nemotron 3 Nano (30B) - Thinking";
};
gpt-oss-20b-thinking = {
name = "GPT OSS (20B)";
};
qwen3-next-80b-instruct = {
name = "Qwen3 Next (80B) - Instruct";
};
qwen3-30b-2507-thinking = {
name = "Qwen3 2507 (30B) Thinking";
};
qwen3-30b-2507-instruct = {
name = "Qwen3 2507 (30B) Instruct";
};
qwen3-4b-2507-instruct = {
name = "Qwen3 2507 (4B) - Instruct";
};
};
}; };
}; };
lsp = { lsp = {

View File

@@ -0,0 +1,53 @@
{ lib }:
let
inherit (lib)
mapAttrs
filterAttrs
any
flatten
listToAttrs
nameValuePair
;
in
{
# Convert llama-swap models to opencode format
toOpencodeModels =
llamaSwapConfig:
let
textGenModels = filterAttrs
(
name: model: any (t: t == "text-generation") (model.metadata.type or [ ])
)
(llamaSwapConfig.models or { });
localModels = mapAttrs
(
name: model:
{
inherit (model) name;
}
// (
if model.macros.ctx or null != null then
{
limit = {
context = lib.toInt model.macros.ctx;
input = lib.toInt model.macros.ctx;
output = lib.toInt model.macros.ctx;
};
}
else
{ }
)
)
textGenModels;
peerModels = listToAttrs (
flatten (
map (peer: map (modelName: nameValuePair modelName { name = modelName; }) peer.models) (
builtins.attrValues (llamaSwapConfig.peers or { })
)
)
);
in
localModels // peerModels;
}

View File

@@ -0,0 +1,454 @@
{ pkgs }:
let
llama-cpp = pkgs.reichard.llama-cpp;
stable-diffusion-cpp = pkgs.reichard.stable-diffusion-cpp.override {
cudaSupport = true;
};
in
{
models = {
# https://huggingface.co/unsloth/Devstral-Small-2-24B-Instruct-2512-GGUF/tree/main
"devstral-small-2-instruct" = {
name = "Devstral Small 2 (24B) - Instruct";
macros.ctx = "98304";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Devstral/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL.gguf \
--chat-template-file /mnt/ssd/Models/Devstral/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL_template.jinja \
--temp 0.15 \
-c ''${ctx} \
-ctk q8_0 \
-ctv q8_0 \
-fit off \
-dev CUDA0
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/GLM-4-32B-0414-GGUF/tree/main
"glm-4-32b-instruct" = {
name = "GLM 4 (32B) - Instruct";
macros.ctx = "32768";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/GLM/GLM-4-32B-0414-Q4_K_M.gguf \
-c ''${ctx} \
--temp 0.6 \
--top-k 40 \
--top-p 0.95 \
--min-p 0.0 \
-fit off \
-dev CUDA0
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/mradermacher/gpt-oss-20b-heretic-v2-i1-GGUF/tree/main
"gpt-oss-20b-thinking" = {
name = "GPT OSS (20B) - Thinking";
macros.ctx = "131072";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/GPT-OSS/gpt-oss-20b-heretic-v2.i1-MXFP4_MOE.gguf \
-c ''${ctx} \
--temp 1.0 \
--top-p 1.0 \
--top-k 40 \
-dev CUDA0
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/mradermacher/GPT-OSS-Cybersecurity-20B-Merged-i1-GGUF/tree/main
"gpt-oss-csec-20b-thinking" = {
name = "GPT OSS CSEC (20B) - Thinking";
macros.ctx = "131072";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/GPT-OSS/GPT-OSS-Cybersecurity-20B-Merged.i1-MXFP4_MOE.gguf \
-c ''${ctx} \
--temp 1.0 \
--top-p 1.0 \
--top-k 40 \
-dev CUDA0
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Qwen3-Next-80B-A3B-Instruct-GGUF/tree/main
"qwen3-next-80b-instruct" = {
name = "Qwen3 Next (80B) - Instruct";
macros.ctx = "262144";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3/Qwen3-Next-80B-A3B-Instruct-UD-Q2_K_XL.gguf \
-c ''${ctx} \
--temp 0.7 \
--min-p 0.0 \
--top-p 0.8 \
--top-k 20 \
--repeat-penalty 1.05 \
-ctk q8_0 \
-ctv q8_0 \
-fit off
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF/tree/main
"qwen3-30b-2507-instruct" = {
name = "Qwen3 2507 (30B) - Instruct";
macros.ctx = "262144";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3/Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf \
-c ''${ctx} \
--temp 0.7 \
--min-p 0.0 \
--top-p 0.8 \
--top-k 20 \
--presence-penalty 1.0 \
--repeat-penalty 1.0 \
-ctk q8_0 \
-ctv q8_0 \
-ts 70,30 \
-fit off
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF/tree/main
"qwen3-coder-30b-instruct" = {
name = "Qwen3 Coder (30B) - Instruct";
macros.ctx = "131072";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3/Qwen3-Coder-30B-A3B-Instruct-UD-Q6_K_XL.gguf \
-c ''${ctx} \
--temp 0.7 \
--min-p 0.0 \
--top-p 0.8 \
--top-k 20 \
--repeat-penalty 1.05 \
-ctk q8_0 \
-ctv q8_0 \
-ts 70,30 \
-fit off
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Qwen3-30B-A3B-Thinking-2507-GGUF/tree/main
"qwen3-30b-2507-thinking" = {
name = "Qwen3 2507 (30B) - Thinking";
macros.ctx = "262144";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3/Qwen3-30B-A3B-Thinking-2507-UD-Q4_K_XL.gguf \
-c ''${ctx} \
--temp 0.6 \
--min-p 0.0 \
--top-p 0.95 \
--top-k 20 \
--presence-penalty 1.0 \
--repeat-penalty 1.0 \
-ctk q8_0 \
-ctv q8_0 \
-ts 70,30 \
-fit off
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Nemotron-3-Nano-30B-A3B-GGUF/tree/main
"nemotron-3-nano-30b-thinking" = {
name = "Nemotron 3 Nano (30B) - Thinking";
macros.ctx = "1048576";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Nemotron/Nemotron-3-Nano-30B-A3B-UD-Q4_K_XL.gguf \
-c ''${ctx} \
--temp 1.1 \
--top-p 0.95 \
-fit off
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Qwen3-VL-8B-Instruct-GGUF/tree/main
"qwen3-8b-vision" = {
name = "Qwen3 Vision (8B) - Thinking";
macros.ctx = "65536";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3/Qwen3-VL-8B-Instruct-UD-Q4_K_XL.gguf \
--mmproj /mnt/ssd/Models/Qwen3/Qwen3-VL-8B-Instruct-UD-Q4_K_XL_mmproj-F16.gguf \
-c ''${ctx} \
--temp 0.7 \
--min-p 0.0 \
--top-p 0.8 \
--top-k 20 \
-ctk q8_0 \
-ctv q8_0 \
-fit off \
-dev CUDA1
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Qwen2.5-Coder-7B-Instruct-128K-GGUF/tree/main
"qwen2.5-coder-7b-instruct" = {
name = "Qwen2.5 Coder (7B) - Instruct";
macros.ctx = "131072";
cmd = ''
${llama-cpp}/bin/llama-server \
-m /mnt/ssd/Models/Qwen2.5/Qwen2.5-Coder-7B-Instruct-Q8_0.gguf \
--fim-qwen-7b-default \
-c ''${ctx} \
--port ''${PORT} \
-fit off \
-dev CUDA1
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Qwen2.5-Coder-3B-Instruct-128K-GGUF/tree/main
"qwen2.5-coder-3b-instruct" = {
name = "Qwen2.5 Coder (3B) - Instruct";
macros.ctx = "131072";
cmd = ''
${llama-cpp}/bin/llama-server \
-m /mnt/ssd/Models/Qwen2.5/Qwen2.5-Coder-3B-Instruct-Q8_0.gguf \
--fim-qwen-3b-default \
--port ''${PORT} \
-c ''${ctx} \
-fit off \
-dev CUDA1
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Qwen3-4B-Instruct-2507-GGUF/tree/main
"qwen3-4b-2507-instruct" = {
name = "Qwen3 2507 (4B) - Instruct";
macros.ctx = "98304";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3/Qwen3-4B-Instruct-2507-Q4_K_M.gguf \
-c ''${ctx} \
-fit off \
-ctk q8_0 \
-ctv q8_0 \
-dev CUDA1
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# ---------------------------------------
# ---------- Stable Diffussion ----------
# ---------------------------------------
"z-image-turbo" = {
name = "Z-Image-Turbo";
checkEndpoint = "/";
cmd = ''
${stable-diffusion-cpp}/bin/sd-server \
--listen-port ''${PORT} \
--diffusion-fa \
--diffusion-model /mnt/ssd/StableDiffusion/ZImageTurbo/z-image-turbo-Q8_0.gguf \
--vae /mnt/ssd/StableDiffusion/ZImageTurbo/ae.safetensors \
--llm /mnt/ssd/Models/Qwen3/Qwen3-4B-Instruct-2507-Q4_K_M.gguf \
--cfg-scale 1.0 \
--steps 8 \
--rng cuda
'';
metadata = {
type = [ "image-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
"qwen-image-edit-2511" = {
name = "Qwen Image Edit 2511";
checkEndpoint = "/";
cmd = ''
${stable-diffusion-cpp}/bin/sd-server \
--listen-port ''${PORT} \
--diffusion-fa \
--qwen-image-zero-cond-t \
--diffusion-model /mnt/ssd/StableDiffusion/QwenImage/qwen-image-edit-2511-Q5_K_M.gguf \
--vae /mnt/ssd/StableDiffusion/QwenImage/qwen_image_vae.safetensors \
--llm /mnt/ssd/Models/Qwen2.5/Qwen2.5-VL-7B-Instruct.Q4_K_M.gguf \
--lora-model-dir /mnt/ssd/StableDiffusion/QwenImage/Loras \
--cfg-scale 2.5 \
--sampling-method euler \
--flow-shift 3 \
--steps 20 \
--rng cuda
'';
metadata = {
type = [
"image-edit"
"image-generation"
];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
"qwen-image-2512" = {
name = "Qwen Image 2512";
checkEndpoint = "/";
cmd = ''
${stable-diffusion-cpp}/bin/sd-server \
--listen-port ''${PORT} \
--diffusion-fa \
--diffusion-model /mnt/ssd/StableDiffusion/QwenImage/qwen-image-2512-Q5_K_M.gguf \
--vae /mnt/ssd/StableDiffusion/QwenImage/qwen_image_vae.safetensors \
--llm /mnt/ssd/Models/Qwen2.5/Qwen2.5-VL-7B-Instruct.Q4_K_M.gguf \
--lora-model-dir /mnt/ssd/StableDiffusion/QwenImage/Loras \
--cfg-scale 2.5 \
--sampling-method euler \
--flow-shift 3 \
--steps 20 \
--rng cuda
'';
metadata = {
type = [ "image-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
"chroma-radiance" = {
name = "Chroma Radiance";
checkEndpoint = "/";
cmd = ''
${stable-diffusion-cpp}/bin/sd-server \
--listen-port ''${PORT} \
--diffusion-fa --chroma-disable-dit-mask \
--diffusion-model /mnt/ssd/StableDiffusion/Chroma/chroma_radiance_x0_q8.gguf \
--t5xxl /mnt/ssd/StableDiffusion/Chroma/t5xxl_fp16.safetensors \
--cfg-scale 4.0 \
--sampling-method euler \
--rng cuda
'';
metadata = {
type = [ "image-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
};
groups = {
shared = {
swap = true;
exclusive = false;
members = [
"nemotron-3-nano-30b-thinking"
"qwen3-30b-2507-instruct"
"qwen3-30b-2507-thinking"
"qwen3-coder-30b-instruct"
"qwen3-next-80b-instruct"
];
};
cuda0 = {
swap = true;
exclusive = false;
members = [
"devstral-small-2-instruct"
"glm-4-32b-instruct"
"gpt-oss-20b-thinking"
"gpt-oss-csec-20b-thinking"
];
};
cuda1 = {
swap = true;
exclusive = false;
members = [
"qwen2.5-coder-3b-instruct"
"qwen2.5-coder-7b-instruct"
"qwen3-4b-2507-instruct"
"qwen3-8b-vision"
];
};
};
peers = {
synthetic = {
proxy = "https://api.synthetic.new/openai/";
models = [
"hf:deepseek-ai/DeepSeek-R1-0528"
"hf:deepseek-ai/DeepSeek-V3"
"hf:deepseek-ai/DeepSeek-V3-0324"
"hf:deepseek-ai/DeepSeek-V3.1"
"hf:deepseek-ai/DeepSeek-V3.1-Terminus"
"hf:deepseek-ai/DeepSeek-V3.2"
"hf:meta-llama/Llama-3.3-70B-Instruct"
"hf:meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
"hf:MiniMaxAI/MiniMax-M2"
"hf:MiniMaxAI/MiniMax-M2.1"
"hf:moonshotai/Kimi-K2-Instruct-0905"
"hf:moonshotai/Kimi-K2-Thinking"
"hf:openai/gpt-oss-120b"
"hf:Qwen/Qwen3-235B-A22B-Instruct-2507"
"hf:Qwen/Qwen3-235B-A22B-Thinking-2507"
"hf:Qwen/Qwen3-Coder-480B-A35B-Instruct"
"hf:Qwen/Qwen3-VL-235B-A22B-Instruct"
"hf:zai-org/GLM-4.5"
"hf:zai-org/GLM-4.6"
"hf:zai-org/GLM-4.7"
];
};
};
}

View File

@@ -5,18 +5,20 @@
, ... , ...
}: }:
let let
inherit (lib) mkIf mkEnableOption; inherit (lib) mkIf mkEnableOption recursiveUpdate;
cfg = config.${namespace}.services.llama-swap; cfg = config.${namespace}.services.llama-swap;
llama-swap = pkgs.reichard.llama-swap; llama-swap = pkgs.reichard.llama-swap;
llama-cpp = pkgs.reichard.llama-cpp;
stable-diffusion-cpp = pkgs.reichard.stable-diffusion-cpp.override {
cudaSupport = true;
};
in in
{ {
options.${namespace}.services.llama-swap = { options.${namespace}.services.llama-swap = {
enable = mkEnableOption "enable llama-swap service"; enable = mkEnableOption "enable llama-swap service";
config = lib.mkOption {
type = lib.types.unspecified;
default = import ./config.nix { inherit pkgs; };
readOnly = true;
description = "The llama-swap configuration data";
};
}; };
config = mkIf cfg.enable { config = mkIf cfg.enable {
@@ -92,413 +94,11 @@ in
owner = "llama-swap"; owner = "llama-swap";
group = "llama-swap"; group = "llama-swap";
mode = "0400"; mode = "0400";
content = builtins.toJSON { content = builtins.toJSON (
models = { recursiveUpdate cfg.config {
# https://huggingface.co/unsloth/Devstral-Small-2-24B-Instruct-2512-GGUF/tree/main peers.synthetic.apiKey = config.sops.placeholder.synthetic_apikey;
"devstral-small-2-instruct" = { }
name = "Devstral Small 2 (24B) - Instruct"; );
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Devstral/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL.gguf \
--chat-template-file /mnt/ssd/Models/Devstral/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL_template.jinja \
--temp 0.15 \
-c 98304 \
-ctk q8_0 \
-ctv q8_0 \
-fit off \
-dev CUDA0
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/mradermacher/gpt-oss-20b-heretic-v2-i1-GGUF/tree/main
"gpt-oss-20b-thinking" = {
name = "GPT OSS (20B) - Thinking";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/GPT-OSS/gpt-oss-20b-heretic-v2.i1-MXFP4_MOE.gguf \
-c 131072 \
--temp 1.0 \
--top-p 1.0 \
--top-k 40 \
-dev CUDA0
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/mradermacher/GPT-OSS-Cybersecurity-20B-Merged-i1-GGUF/tree/main
"gpt-oss-csec-20b-thinking" = {
name = "GPT OSS CSEC (20B) - Thinking";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/GPT-OSS/GPT-OSS-Cybersecurity-20B-Merged.i1-MXFP4_MOE.gguf \
-c 131072 \
--temp 1.0 \
--top-p 1.0 \
--top-k 40 \
-dev CUDA0
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Qwen3-Next-80B-A3B-Instruct-GGUF/tree/main
"qwen3-next-80b-instruct" = {
name = "Qwen3 Next (80B) - Instruct";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3/Qwen3-Next-80B-A3B-Instruct-UD-Q2_K_XL.gguf \
-c 262144 \
--temp 0.7 \
--min-p 0.0 \
--top-p 0.8 \
--top-k 20 \
--repeat-penalty 1.05 \
-ctk q8_0 \
-ctv q8_0 \
-fit off
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF/tree/main
"qwen3-30b-2507-instruct" = {
name = "Qwen3 2507 (30B) - Instruct";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3/Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf \
-c 262144 \
--temp 0.7 \
--min-p 0.0 \
--top-p 0.8 \
--top-k 20 \
--repeat-penalty 1.05 \
-ctk q8_0 \
-ctv q8_0 \
-ts 70,30 \
-fit off
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF/tree/main
"qwen3-coder-30b-instruct" = {
name = "Qwen3 Coder (30B) - Instruct";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3/Qwen3-Coder-30B-A3B-Instruct-UD-Q6_K_XL.gguf \
-c 131072 \
--temp 0.7 \
--min-p 0.0 \
--top-p 0.8 \
--top-k 20 \
--repeat-penalty 1.05 \
-ctk q8_0 \
-ctv q8_0 \
-ts 70,30 \
-fit off
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Qwen3-30B-A3B-Thinking-2507-GGUF/tree/main
"qwen3-30b-2507-thinking" = {
name = "Qwen3 2507 (30B) - Thinking";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3/Qwen3-30B-A3B-Thinking-2507-UD-Q4_K_XL.gguf \
-c 262144 \
--temp 0.7 \
--min-p 0.0 \
--top-p 0.8 \
--top-k 20 \
--repeat-penalty 1.05 \
-ctk q8_0 \
-ctv q8_0 \
-ts 70,30 \
-fit off
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Nemotron-3-Nano-30B-A3B-GGUF/tree/main
"nemotron-3-nano-30b-thinking" = {
name = "Nemotron 3 Nano (30B) - Thinking";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Nemotron/Nemotron-3-Nano-30B-A3B-UD-Q4_K_XL.gguf \
-c 1048576 \
--temp 1.1 \
--top-p 0.95 \
-fit off
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Qwen3-VL-8B-Instruct-GGUF/tree/main
"qwen3-8b-vision" = {
name = "Qwen3 Vision (8B) - Thinking";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3/Qwen3-VL-8B-Instruct-UD-Q4_K_XL.gguf \
--mmproj /mnt/ssd/Models/Qwen3/Qwen3-VL-8B-Instruct-UD-Q4_K_XL_mmproj-F16.gguf \
-c 65536 \
--temp 0.7 \
--min-p 0.0 \
--top-p 0.8 \
--top-k 20 \
-ctk q8_0 \
-ctv q8_0 \
-fit off \
-dev CUDA1
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Qwen2.5-Coder-7B-Instruct-128K-GGUF/tree/main
"qwen2.5-coder-7b-instruct" = {
name = "Qwen2.5 Coder (7B) - Instruct";
cmd = ''
${llama-cpp}/bin/llama-server \
-m /mnt/ssd/Models/Qwen2.5/Qwen2.5-Coder-7B-Instruct-Q8_0.gguf \
--fim-qwen-7b-default \
-c 131072 \
--port ''${PORT} \
-fit off \
-dev CUDA1
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Qwen2.5-Coder-3B-Instruct-128K-GGUF/tree/main
"qwen2.5-coder-3b-instruct" = {
name = "Qwen2.5 Coder (3B) - Instruct";
cmd = ''
${llama-cpp}/bin/llama-server \
-m /mnt/ssd/Models/Qwen2.5/Qwen2.5-Coder-3B-Instruct-Q8_0.gguf \
--fim-qwen-3b-default \
--port ''${PORT} \
-fit off \
-dev CUDA1
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Qwen3-4B-Instruct-2507-GGUF/tree/main
"qwen3-4b-2507-instruct" = {
name = "Qwen3 2507 (4B) - Instruct";
cmd = ''
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3/Qwen3-4B-Instruct-2507-Q4_K_M.gguf \
-c 98304 \
-fit off \
-ctk q8_0 \
-ctv q8_0 \
-dev CUDA1
'';
metadata = {
type = [ "text-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
"z-image-turbo" = {
name = "Z-Image-Turbo";
checkEndpoint = "/";
cmd = ''
${stable-diffusion-cpp}/bin/sd-server \
--listen-port ''${PORT} \
--diffusion-fa \
--diffusion-model /mnt/ssd/StableDiffusion/ZImageTurbo/z-image-turbo-Q8_0.gguf \
--vae /mnt/ssd/StableDiffusion/ZImageTurbo/ae.safetensors \
--llm /mnt/ssd/Models/Qwen3/Qwen3-4B-Instruct-2507-Q4_K_M.gguf \
--cfg-scale 1.0 \
--steps 8 \
--rng cuda
'';
metadata = {
type = [ "image-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
# https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/tree/main
"qwen-image-edit-2511" = {
name = "Qwen Image Edit 2511";
checkEndpoint = "/";
cmd = ''
${stable-diffusion-cpp}/bin/sd-server \
--listen-port ''${PORT} \
--diffusion-fa \
--qwen-image-zero-cond-t \
--diffusion-model /mnt/ssd/StableDiffusion/QwenImage/qwen-image-edit-2511-Q5_K_M.gguf \
--vae /mnt/ssd/StableDiffusion/QwenImage/qwen_image_vae.safetensors \
--llm /mnt/ssd/Models/Qwen2.5/Qwen2.5-VL-7B-Instruct.Q4_K_M.gguf \
--lora-model-dir /mnt/ssd/StableDiffusion/QwenImage/Loras \
--cfg-scale 2.5 \
--sampling-method euler \
--flow-shift 3 \
--steps 20 \
--rng cuda
'';
metadata = {
type = [
"image-edit"
"image-generation"
];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
"qwen-image-2512" = {
name = "Qwen Image 2512";
checkEndpoint = "/";
cmd = ''
${stable-diffusion-cpp}/bin/sd-server \
--listen-port ''${PORT} \
--diffusion-fa \
--diffusion-model /mnt/ssd/StableDiffusion/QwenImage/qwen-image-2512-Q5_K_M.gguf \
--vae /mnt/ssd/StableDiffusion/QwenImage/qwen_image_vae.safetensors \
--llm /mnt/ssd/Models/Qwen2.5/Qwen2.5-VL-7B-Instruct.Q4_K_M.gguf \
--lora-model-dir /mnt/ssd/StableDiffusion/QwenImage/Loras \
--cfg-scale 2.5 \
--sampling-method euler \
--flow-shift 3 \
--steps 20 \
--rng cuda
'';
metadata = {
type = [ "image-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
"chroma-radiance" = {
name = "Chroma Radiance";
checkEndpoint = "/";
cmd = ''
${stable-diffusion-cpp}/bin/sd-server \
--listen-port ''${PORT} \
--diffusion-fa --chroma-disable-dit-mask \
--diffusion-model /mnt/ssd/StableDiffusion/Chroma/chroma_radiance_x0_q8.gguf \
--t5xxl /mnt/ssd/StableDiffusion/Chroma/t5xxl_fp16.safetensors \
--cfg-scale 4.0 \
--sampling-method euler \
--rng cuda
'';
metadata = {
type = [ "image-generation" ];
};
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
};
};
groups = {
shared = {
swap = true;
exclusive = false;
members = [
"nemotron-3-nano-30b-thinking"
"qwen3-30b-2507-instruct"
"qwen3-30b-2507-thinking"
"qwen3-coder-30b-instruct"
"qwen3-next-80b-instruct"
];
};
cuda0 = {
swap = true;
exclusive = false;
members = [
"devstral-small-2-instruct"
"gpt-oss-20b-thinking"
"gpt-oss-csec-20b-thinking"
];
};
cuda1 = {
swap = true;
exclusive = false;
members = [
"qwen2.5-coder-3b-instruct"
"qwen2.5-coder-7b-instruct"
"qwen3-4b-2507-instruct"
"qwen3-8b-vision"
];
};
};
peers = {
synthetic = {
proxy = "https://api.synthetic.new/openai/";
apiKey = "${config.sops.placeholder.synthetic_apikey}";
models = [
"hf:deepseek-ai/DeepSeek-R1-0528"
"hf:deepseek-ai/DeepSeek-V3"
"hf:deepseek-ai/DeepSeek-V3-0324"
"hf:deepseek-ai/DeepSeek-V3.1"
"hf:deepseek-ai/DeepSeek-V3.1-Terminus"
"hf:deepseek-ai/DeepSeek-V3.2"
"hf:meta-llama/Llama-3.3-70B-Instruct"
"hf:meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
"hf:MiniMaxAI/MiniMax-M2"
"hf:MiniMaxAI/MiniMax-M2.1"
"hf:moonshotai/Kimi-K2-Instruct-0905"
"hf:moonshotai/Kimi-K2-Thinking"
"hf:openai/gpt-oss-120b"
"hf:Qwen/Qwen3-235B-A22B-Instruct-2507"
"hf:Qwen/Qwen3-235B-A22B-Thinking-2507"
"hf:Qwen/Qwen3-Coder-480B-A35B-Instruct"
"hf:Qwen/Qwen3-VL-235B-A22B-Instruct"
"hf:zai-org/GLM-4.5"
"hf:zai-org/GLM-4.6"
"hf:zai-org/GLM-4.7"
];
};
};
};
}; };
}; };