chore: various improvements & refactor
This commit is contained in:
@@ -1,33 +1,16 @@
|
|||||||
local llm_endpoint = "https://llm-api.va.reichard.io"
|
local llm_endpoint = "https://llm-api.va.reichard.io"
|
||||||
local llm_assistant_model = "devstral-small-2-instruct"
|
local llm_assistant_model = "qwen3-coder-30b-instruct"
|
||||||
local llm_infill_model = "qwen2.5-coder-3b-instruct"
|
local llm_infill_model = "qwen3-coder-30b-instruct"
|
||||||
|
|
||||||
-- Default Llama - Toggle Llama & Copilot
|
-- local llm_assistant_model = "devstral-small-2-instruct"
|
||||||
local current_fim = "llama"
|
-- local llm_infill_model = "qwen2.5-coder-3b-instruct"
|
||||||
local function switch_llm_fim_provider(switch_to)
|
|
||||||
if switch_to == "llama" then
|
|
||||||
vim.g.copilot_filetypes = { ["*"] = true }
|
|
||||||
vim.cmd("Copilot disable")
|
|
||||||
vim.cmd("LlamaEnable")
|
|
||||||
current_fim = "llama"
|
|
||||||
vim.notify("Llama FIM enabled", vim.log.levels.INFO)
|
|
||||||
else
|
|
||||||
vim.g.copilot_filetypes = { ["*"] = true }
|
|
||||||
vim.cmd("Copilot enable")
|
|
||||||
vim.cmd("LlamaDisable")
|
|
||||||
current_fim = "copilot"
|
|
||||||
vim.notify("Copilot FIM enabled", vim.log.levels.INFO)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
vim.api.nvim_create_autocmd("VimEnter", {
|
|
||||||
callback = function()
|
local current_fim = "copilot" -- change this to switch default
|
||||||
switch_llm_fim_provider(current_fim)
|
|
||||||
end,
|
|
||||||
})
|
|
||||||
|
|
||||||
-- Copilot Configuration
|
-- Copilot Configuration
|
||||||
vim.g.copilot_no_tab_map = true
|
vim.g.copilot_no_tab_map = true
|
||||||
|
vim.g.copilot_filetypes = { ["*"] = true }
|
||||||
|
|
||||||
-- LLama LLM FIM
|
-- LLama LLM FIM
|
||||||
vim.g.llama_config = {
|
vim.g.llama_config = {
|
||||||
@@ -35,9 +18,24 @@ vim.g.llama_config = {
|
|||||||
model = llm_infill_model,
|
model = llm_infill_model,
|
||||||
n_predict = 2048,
|
n_predict = 2048,
|
||||||
ring_n_chunks = 32,
|
ring_n_chunks = 32,
|
||||||
enable_at_startup = false,
|
enable_at_startup = (current_fim == "llama"), -- enable based on default
|
||||||
}
|
}
|
||||||
|
|
||||||
|
-- Toggle function for manual switching
|
||||||
|
local function switch_llm_fim_provider(switch_to)
|
||||||
|
if switch_to == "llama" then
|
||||||
|
vim.cmd("Copilot disable")
|
||||||
|
vim.cmd("LlamaEnable")
|
||||||
|
current_fim = "llama"
|
||||||
|
vim.notify("Llama FIM enabled", vim.log.levels.INFO)
|
||||||
|
else
|
||||||
|
vim.cmd("Copilot enable")
|
||||||
|
vim.cmd("LlamaDisable")
|
||||||
|
current_fim = "copilot"
|
||||||
|
vim.notify("Copilot FIM enabled", vim.log.levels.INFO)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
-- Configure Code Companion
|
-- Configure Code Companion
|
||||||
require("plugins.codecompanion.fidget-spinner"):init()
|
require("plugins.codecompanion.fidget-spinner"):init()
|
||||||
local codecompanion = require("codecompanion")
|
local codecompanion = require("codecompanion")
|
||||||
|
|||||||
@@ -1,66 +0,0 @@
|
|||||||
---
|
|
||||||
description: Discovers relevant code and builds a focused implementation plan with exact file references
|
|
||||||
mode: subagent
|
|
||||||
temperature: 0.4
|
|
||||||
permission:
|
|
||||||
"*": deny
|
|
||||||
context7_*: allow
|
|
||||||
glob: allow
|
|
||||||
grep: allow
|
|
||||||
list: allow
|
|
||||||
lsp: allow
|
|
||||||
read: allow
|
|
||||||
todoread: allow
|
|
||||||
todowrite: allow
|
|
||||||
---
|
|
||||||
|
|
||||||
You analyze requirements and discover the relevant code context needed for implementation.
|
|
||||||
|
|
||||||
**Your job:**
|
|
||||||
|
|
||||||
1. Read through the codebase to understand what exists
|
|
||||||
2. Identify specific files and line ranges relevant to the task
|
|
||||||
3. Create a focused plan with exact references for the @developer agent
|
|
||||||
4. Describe what needs to change and why
|
|
||||||
|
|
||||||
**Deliver a compressed context map:**
|
|
||||||
|
|
||||||
For each relevant file section, use this format:
|
|
||||||
`path/file.py:10-25` - Current behavior. Needed change.
|
|
||||||
|
|
||||||
Keep it to ONE sentence per part (what it does, what needs changing).
|
|
||||||
|
|
||||||
**Example:**
|
|
||||||
`auth.py:45-67` - Login function with basic validation. Add rate limiting using existing middleware pattern.
|
|
||||||
`middleware/rate_limit.py:10-35` - Rate limiter for API endpoints. Reference this implementation.
|
|
||||||
`config.py:78` - Rate limit config (5 req/min). Use these values.
|
|
||||||
|
|
||||||
**Don't include:**
|
|
||||||
|
|
||||||
- Full code snippets (developer will read the files)
|
|
||||||
- Detailed explanations (just pointers)
|
|
||||||
- Implementation details (that's developer's job)
|
|
||||||
|
|
||||||
**Do include:**
|
|
||||||
|
|
||||||
- Exact line ranges so developer reads only what's needed
|
|
||||||
- Key constraints or patterns to follow
|
|
||||||
- Dependencies between files
|
|
||||||
|
|
||||||
**Examples of good references:**
|
|
||||||
|
|
||||||
- "`auth.py:45-67` - login function, needs error handling"
|
|
||||||
- "`db.py:12-30` - connection logic, check timeout handling"
|
|
||||||
- "`api/routes.py:89` - endpoint definition to modify"
|
|
||||||
- "`tests/test_auth.py:23-45` - existing tests to update"
|
|
||||||
|
|
||||||
**Examples of good plans:**
|
|
||||||
|
|
||||||
"Add rate limiting to login:
|
|
||||||
|
|
||||||
- `auth.py:45-67` - Current login function with no rate limiting
|
|
||||||
- `middleware/rate_limit.py:10-35` - Existing rate limiter for API
|
|
||||||
- Need: Apply same pattern to login endpoint
|
|
||||||
- Related: `config.py:78` - Rate limit settings"
|
|
||||||
|
|
||||||
You're the context scout - provide precise pointers so @developer doesn't waste context searching.
|
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
---
|
---
|
||||||
description: Implements code based on plans and addresses review feedback
|
description: Implements code from plans and review feedback
|
||||||
mode: subagent
|
mode: subagent
|
||||||
temperature: 0.3
|
temperature: 0.3
|
||||||
permission:
|
permission:
|
||||||
@@ -16,61 +16,29 @@ permission:
|
|||||||
todowrite: allow
|
todowrite: allow
|
||||||
---
|
---
|
||||||
|
|
||||||
You implement code. You are the only agent that modifies files.
|
You implement code. You're the only agent that modifies files.
|
||||||
|
|
||||||
**DO NOT re-analyze or re-plan.** @architect already did discovery and planning. You execute.
|
**Input:**
|
||||||
|
|
||||||
**When building from a plan:**
|
- Plan file path from @planner
|
||||||
|
- Optional: Review feedback from @reviewer
|
||||||
|
|
||||||
- Start with the specific files and lines mentioned in the plan
|
**Workflow:**
|
||||||
- Read incrementally if you need to understand:
|
|
||||||
- Function/class definitions referenced in those lines
|
|
||||||
- Import sources or dependencies
|
|
||||||
- Related code that must be updated together
|
|
||||||
- Stop reading once you understand what to change and how
|
|
||||||
- Don't search the entire codebase or read files "just in case"
|
|
||||||
- Trust the plan's pointers as your starting point
|
|
||||||
|
|
||||||
**Example workflow:**
|
|
||||||
|
|
||||||
1. Plan says: `auth.py:45-67` - Read lines 45-67
|
|
||||||
2. See it calls `validate_user()` - Read that function definition
|
|
||||||
3. Realize validate_user is imported from `utils.py` - Read that too
|
|
||||||
4. Implement changes across both files
|
|
||||||
5. Done
|
|
||||||
|
|
||||||
**When addressing review feedback:**
|
|
||||||
|
|
||||||
- **Critical findings** (security, logic errors): Must fix
|
|
||||||
- **Regular findings** (quality, errors): Must fix
|
|
||||||
- **Nits** (style, minor): Optional, use judgment
|
|
||||||
|
|
||||||
**Your workflow:**
|
|
||||||
|
|
||||||
1. Read the specific files mentioned in the plan
|
|
||||||
2. Implement the changes described
|
|
||||||
3. **When done, commit your work:**
|
|
||||||
|
|
||||||
|
1. Read the plan file
|
||||||
|
2. Read the specific files/lines mentioned in context maps
|
||||||
|
3. Read incrementally if needed (imports, function definitions, etc.)
|
||||||
|
4. Implement changes
|
||||||
|
5. Commit:
|
||||||
```bash
|
```bash
|
||||||
git add -A
|
git add -A
|
||||||
git commit -m "type: what you implemented"
|
git commit -m "type: description"
|
||||||
```
|
```
|
||||||
|
Types: `feat`, `fix`, `refactor`, `docs`, `test`, `chore`
|
||||||
|
|
||||||
**Conventional commit types:**
|
**Rules:**
|
||||||
- `feat:` - New feature
|
|
||||||
- `fix:` - Bug fix
|
|
||||||
- `refactor:` - Code restructuring
|
|
||||||
- `docs:` - Documentation only
|
|
||||||
- `test:` - Adding/updating tests
|
|
||||||
- `chore:` - Maintenance tasks
|
|
||||||
|
|
||||||
4. Done
|
- Trust the plan - don't re-analyze or re-plan
|
||||||
|
- Start with context map locations, expand only as needed
|
||||||
**Do NOT:**
|
- Fix all critical/regular findings, use judgment on nits
|
||||||
|
- Stop reading once you understand the change
|
||||||
- Re-read the entire codebase
|
|
||||||
- Search for additional context
|
|
||||||
- Second-guess the plan
|
|
||||||
- Do your own discovery phase
|
|
||||||
|
|
||||||
Be efficient. Trust @architect's context work. Just code.
|
|
||||||
|
|||||||
@@ -1,46 +1,37 @@
|
|||||||
---
|
---
|
||||||
description: Orchestrates features or bug fixes by delegating to subagents
|
description: Orchestrates development by delegating to subagents
|
||||||
mode: primary
|
mode: primary
|
||||||
temperature: 0.2
|
temperature: 0.2
|
||||||
maxSteps: 50
|
maxSteps: 50
|
||||||
permission:
|
permission:
|
||||||
"*": deny
|
"*": deny
|
||||||
task: allow
|
task:
|
||||||
|
"*": deny
|
||||||
|
planner: allow
|
||||||
|
developer: allow
|
||||||
|
reviewer: allow
|
||||||
---
|
---
|
||||||
|
|
||||||
You are a workflow orchestrator. You ONLY call subagents - you never analyze, plan, code, or review yourself. Your high level flow is @architect -> @developer -> @reviewer
|
You orchestrate development by delegating to subagents. Never code yourself.
|
||||||
|
|
||||||
**Your subagents:**
|
**Subagents:**
|
||||||
|
|
||||||
- **@architect** - Analyzes requirements and creates plans
|
- **@planner** - Creates implementation plans in `./plans/`
|
||||||
- **@developer** - Implements the plan from @architect
|
- **@developer** - Implements from plan files
|
||||||
- **@reviewer** - Reviews the implementation from @developer
|
- **@reviewer** - Reviews implementations
|
||||||
|
|
||||||
**Your workflow:**
|
**Workflow:**
|
||||||
|
|
||||||
1. Call @architect with user requirements.
|
1. **Plan**: Call @planner with requirements
|
||||||
2. Present the plan to the user for approval or changes.
|
2. **Review Plan**: Show user the plan path, ask for approval
|
||||||
3. If the user requests changes:
|
3. **Develop**: Call @developer with plan file path
|
||||||
- Call @architect again with the feedback.
|
4. **Review Code**: Call @reviewer with implementation
|
||||||
- Repeat step 2.
|
5. **Iterate**: If NEEDS_WORK, call @developer with plan + feedback
|
||||||
4. Once the plan is approved, call @developer with the full, unmodified plan.
|
6. **Done**: When APPROVED or APPROVED_WITH_NITS
|
||||||
5. Call @reviewer with the @developer output.
|
|
||||||
6. If the verdict is NEEDS_WORK:
|
|
||||||
- Call @developer with the plan + review feedback.
|
|
||||||
7. Repeat steps 5-6 until the implementation is APPROVED or APPROVED_WITH_NITS.
|
|
||||||
8. Report completion to the user:
|
|
||||||
- If APPROVED: "Implementation complete and approved."
|
|
||||||
- If APPROVED_WITH_NITS: "Implementation complete. Optional improvements available: [list nits]. Address these? (yes/no)"
|
|
||||||
9. If the user wants nits fixed:
|
|
||||||
- Call @developer with the plan + nit list.
|
|
||||||
- Call @reviewer one final time.
|
|
||||||
10. Done.
|
|
||||||
|
|
||||||
**Rules:**
|
**Rules:**
|
||||||
|
|
||||||
- Never do the work yourself - always delegate
|
- Always pass plan file path to @developer (not plan content)
|
||||||
- Pass information between agents clearly, do not leave out context from the previous agent
|
- Include review feedback on iterations
|
||||||
- On iteration 2+ of develop→review, always include both plan AND review feedback
|
- Nits are optional - ask user if they want them fixed
|
||||||
- Keep user informed of which agent is working
|
- Keep user informed of current step
|
||||||
- Nits are optional - don't require fixes
|
|
||||||
- Stop when code is approved or only nits remain
|
|
||||||
|
|||||||
100
modules/home/programs/terminal/opencode/config/agents/planner.md
Normal file
100
modules/home/programs/terminal/opencode/config/agents/planner.md
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
---
|
||||||
|
description: Explores codebase and breaks features into ordered implementation tasks. Writes plans to ./plans/
|
||||||
|
mode: subagent
|
||||||
|
temperature: 0.3
|
||||||
|
permission:
|
||||||
|
"*": deny
|
||||||
|
context7_*: allow
|
||||||
|
edit: allow
|
||||||
|
glob: allow
|
||||||
|
grep: allow
|
||||||
|
list: allow
|
||||||
|
lsp: allow
|
||||||
|
read: allow
|
||||||
|
---
|
||||||
|
|
||||||
|
# Code Task Planner Agent
|
||||||
|
|
||||||
|
You are a code analysis agent that breaks down feature requests into implementable, independent tasks.
|
||||||
|
|
||||||
|
## Your Task
|
||||||
|
|
||||||
|
1. **Analyze the codebase** using available tools (grep, lsp, read, etc.)
|
||||||
|
2. **Identify dependencies** between components
|
||||||
|
3. **Create ordered tasks** where each task can be implemented independently
|
||||||
|
4. **Generate context maps** showing exact files and line numbers that need changes
|
||||||
|
5. **Write the plan** to `./plans/<PLAN_NAME>.md`
|
||||||
|
|
||||||
|
## Task Requirements
|
||||||
|
|
||||||
|
- **Independent**: Each task should be implementable without future tasks
|
||||||
|
- **Hierarchical**: Dependencies must come before dependents
|
||||||
|
- **Specific**: Include exact file paths and line numbers
|
||||||
|
- **Contextual**: Explain WHY each file matters (1-2 lines max)
|
||||||
|
|
||||||
|
## Output Format
|
||||||
|
|
||||||
|
Write to `./plans/<PLAN_NAME>.md` with this structure:
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
# Plan: <PLAN_NAME>
|
||||||
|
|
||||||
|
## Feature Overview
|
||||||
|
|
||||||
|
<feature summary>
|
||||||
|
|
||||||
|
## Implementation Tasks
|
||||||
|
|
||||||
|
### Task 1: <Descriptive Title>
|
||||||
|
|
||||||
|
**Context Map:**
|
||||||
|
|
||||||
|
- `<file_path>:<line_number>` - <why it's relevant or what changes>
|
||||||
|
- `<file_path>:<line_number>` - <why it's relevant or what changes>
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Task 2: <Descriptive Title>
|
||||||
|
|
||||||
|
**Context Map:**
|
||||||
|
|
||||||
|
- `<file_path>:<line_number>` - <why it's relevant or what changes>
|
||||||
|
|
||||||
|
---
|
||||||
|
```
|
||||||
|
|
||||||
|
## Analysis Strategy
|
||||||
|
|
||||||
|
1. **Start with interfaces/contracts** - these are foundational
|
||||||
|
2. **Then implementations** - concrete types that satisfy interfaces
|
||||||
|
3. **Then handlers/controllers** - code that uses the implementations
|
||||||
|
4. **Finally integrations** - wiring everything together
|
||||||
|
|
||||||
|
## Context Map Guidelines
|
||||||
|
|
||||||
|
- Use exact line numbers from actual code analysis
|
||||||
|
- Be specific: "Add AddChat method" not "modify file"
|
||||||
|
- Include both new additions AND modifications to existing code
|
||||||
|
- If a file doesn't exist yet, use line 0 and note "new file"
|
||||||
|
|
||||||
|
## Example
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
### Task 1: Add Store Interface Methods
|
||||||
|
|
||||||
|
**Context Map:**
|
||||||
|
|
||||||
|
- `./internal/store/interface.go:15` - Add Conversation struct definition
|
||||||
|
- `./internal/store/interface.go:28` - Add AddConversation method to Store interface
|
||||||
|
- `./internal/store/interface.go:32` - Add AddMessage method to Store interface
|
||||||
|
```
|
||||||
|
|
||||||
|
Remember: The context map is what developers see FIRST, so make it count!
|
||||||
|
|
||||||
|
## Completion
|
||||||
|
|
||||||
|
After writing the plan file, respond with:
|
||||||
|
|
||||||
|
**Plan created:** `<PLAN_NAME>`
|
||||||
|
**Path:** `./plans/<PLAN_NAME>.md`
|
||||||
|
**Tasks:** <number of tasks>
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
---
|
---
|
||||||
description: Expert code reviewer providing structured feedback on implementations
|
description: Reviews implementations and provides structured feedback
|
||||||
mode: subagent
|
mode: subagent
|
||||||
temperature: 0.2
|
temperature: 0.2
|
||||||
permission:
|
permission:
|
||||||
@@ -19,50 +19,35 @@ permission:
|
|||||||
read: allow
|
read: allow
|
||||||
---
|
---
|
||||||
|
|
||||||
You are an expert code reviewer. Review implementations and provide structured feedback.
|
You review code implementations.
|
||||||
|
|
||||||
**Your process:**
|
**Process:**
|
||||||
|
|
||||||
- Check for uncommitted changes first: `git status`
|
1. Check `git status` - if uncommitted changes, stop and tell @developer to commit
|
||||||
- If there are uncommitted changes, respond:
|
2. Review latest commit with `git show`
|
||||||
"ERROR: Found uncommitted changes. @developer must run `git add -A && git commit -m "type: description"` first."
|
3. Read full files only if needed for context
|
||||||
- Otherwise, review the latest commit with `git show`
|
|
||||||
- Read full files for additional context only if needed
|
|
||||||
- Focus on the actual changes made by @developer
|
|
||||||
|
|
||||||
**You MUST start your response with a verdict line:**
|
**Response format:**
|
||||||
|
|
||||||
VERDICT: [APPROVED | NEEDS_WORK | APPROVED_WITH_NITS]
|
VERDICT: [APPROVED | NEEDS_WORK | APPROVED_WITH_NITS]
|
||||||
|
|
||||||
**Then categorize all findings:**
|
**Critical:** (security, logic errors, data corruption)
|
||||||
|
|
||||||
**Critical Findings** (must fix):
|
- Finding 1
|
||||||
|
- Finding 2
|
||||||
|
|
||||||
- Security vulnerabilities
|
**Regular:** (quality, error handling, performance)
|
||||||
- Logical errors
|
|
||||||
- Data corruption risks
|
|
||||||
- Breaking changes
|
|
||||||
|
|
||||||
**Regular Findings** (should fix):
|
- Finding 1
|
||||||
|
|
||||||
- Code quality issues
|
**Nits:** (style, minor improvements)
|
||||||
- Missing error handling
|
|
||||||
- Performance problems
|
|
||||||
- Maintainability concerns
|
|
||||||
|
|
||||||
**Nits** (optional):
|
- Finding 1
|
||||||
|
|
||||||
- Style preferences
|
|
||||||
- Minor optimizations
|
|
||||||
- Documentation improvements
|
|
||||||
- Naming suggestions
|
|
||||||
|
|
||||||
**Verdict rules:**
|
**Verdict rules:**
|
||||||
|
|
||||||
- NEEDS_WORK: Any critical or regular findings exist
|
- NEEDS_WORK: Any critical or regular findings
|
||||||
- APPROVED_WITH_NITS: Only nits remain
|
- APPROVED_WITH_NITS: Only nits
|
||||||
- APPROVED: No findings at all
|
- APPROVED: No findings
|
||||||
|
|
||||||
If you list any critical or regular findings, your verdict MUST be NEEDS_WORK.
|
Be thorough, not pedantic.
|
||||||
|
|
||||||
Be thorough but fair. Don't bikeshed.
|
|
||||||
|
|||||||
@@ -2,10 +2,15 @@
|
|||||||
, pkgs
|
, pkgs
|
||||||
, config
|
, config
|
||||||
, namespace
|
, namespace
|
||||||
|
, osConfig
|
||||||
, ...
|
, ...
|
||||||
}:
|
}:
|
||||||
let
|
let
|
||||||
inherit (lib) mkIf;
|
inherit (lib) mkIf;
|
||||||
|
|
||||||
|
helpers = import ./lib.nix { inherit lib; };
|
||||||
|
llamaSwapConfig = osConfig.${namespace}.services.llama-swap.config or { };
|
||||||
|
|
||||||
cfg = config.${namespace}.programs.terminal.opencode;
|
cfg = config.${namespace}.programs.terminal.opencode;
|
||||||
in
|
in
|
||||||
{
|
{
|
||||||
@@ -21,7 +26,7 @@ in
|
|||||||
enableMcpIntegration = true;
|
enableMcpIntegration = true;
|
||||||
agents = {
|
agents = {
|
||||||
orchestrator = ./config/agents/orchestrator.md;
|
orchestrator = ./config/agents/orchestrator.md;
|
||||||
architect = ./config/agents/architect.md;
|
planner = ./config/agents/planner.md;
|
||||||
developer = ./config/agents/developer.md;
|
developer = ./config/agents/developer.md;
|
||||||
reviewer = ./config/agents/reviewer.md;
|
reviewer = ./config/agents/reviewer.md;
|
||||||
agent-creator = ./config/agents/agent-creator.md;
|
agent-creator = ./config/agents/agent-creator.md;
|
||||||
@@ -38,48 +43,13 @@ in
|
|||||||
content = builtins.toJSON {
|
content = builtins.toJSON {
|
||||||
"$schema" = "https://opencode.ai/config.json";
|
"$schema" = "https://opencode.ai/config.json";
|
||||||
theme = "catppuccin";
|
theme = "catppuccin";
|
||||||
# model = "llama-swap/devstral-small-2-instruct";
|
|
||||||
provider = {
|
provider = {
|
||||||
"llama-swap" = {
|
"llama-swap" = {
|
||||||
npm = "@ai-sdk/openai-compatible";
|
npm = "@ai-sdk/openai-compatible";
|
||||||
options = {
|
options = {
|
||||||
baseURL = "https://llm-api.va.reichard.io/v1";
|
baseURL = "https://llm-api.va.reichard.io/v1";
|
||||||
};
|
};
|
||||||
models = {
|
models = helpers.toOpencodeModels llamaSwapConfig;
|
||||||
"hf:Qwen/Qwen3-Coder-480B-A35B-Instruct" = {
|
|
||||||
name = "Qwen3 Coder (480B) Instruct";
|
|
||||||
};
|
|
||||||
"hf:zai-org/GLM-4.7" = {
|
|
||||||
name = "GLM 4.7";
|
|
||||||
};
|
|
||||||
"hf:MiniMaxAI/MiniMax-M2.1" = {
|
|
||||||
name = "MiniMax M2.1";
|
|
||||||
};
|
|
||||||
devstral-small-2-instruct = {
|
|
||||||
name = "Devstral Small 2 (24B)";
|
|
||||||
};
|
|
||||||
qwen3-coder-30b-instruct = {
|
|
||||||
name = "Qwen3 Coder (30B)";
|
|
||||||
};
|
|
||||||
nemotron-3-nano-30b-thinking = {
|
|
||||||
name = "Nemotron 3 Nano (30B) - Thinking";
|
|
||||||
};
|
|
||||||
gpt-oss-20b-thinking = {
|
|
||||||
name = "GPT OSS (20B)";
|
|
||||||
};
|
|
||||||
qwen3-next-80b-instruct = {
|
|
||||||
name = "Qwen3 Next (80B) - Instruct";
|
|
||||||
};
|
|
||||||
qwen3-30b-2507-thinking = {
|
|
||||||
name = "Qwen3 2507 (30B) Thinking";
|
|
||||||
};
|
|
||||||
qwen3-30b-2507-instruct = {
|
|
||||||
name = "Qwen3 2507 (30B) Instruct";
|
|
||||||
};
|
|
||||||
qwen3-4b-2507-instruct = {
|
|
||||||
name = "Qwen3 2507 (4B) - Instruct";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
lsp = {
|
lsp = {
|
||||||
|
|||||||
53
modules/home/programs/terminal/opencode/lib.nix
Normal file
53
modules/home/programs/terminal/opencode/lib.nix
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
{ lib }:
|
||||||
|
let
|
||||||
|
inherit (lib)
|
||||||
|
mapAttrs
|
||||||
|
filterAttrs
|
||||||
|
any
|
||||||
|
flatten
|
||||||
|
listToAttrs
|
||||||
|
nameValuePair
|
||||||
|
;
|
||||||
|
in
|
||||||
|
{
|
||||||
|
# Convert llama-swap models to opencode format
|
||||||
|
toOpencodeModels =
|
||||||
|
llamaSwapConfig:
|
||||||
|
let
|
||||||
|
textGenModels = filterAttrs
|
||||||
|
(
|
||||||
|
name: model: any (t: t == "text-generation") (model.metadata.type or [ ])
|
||||||
|
)
|
||||||
|
(llamaSwapConfig.models or { });
|
||||||
|
|
||||||
|
localModels = mapAttrs
|
||||||
|
(
|
||||||
|
name: model:
|
||||||
|
{
|
||||||
|
inherit (model) name;
|
||||||
|
}
|
||||||
|
// (
|
||||||
|
if model.macros.ctx or null != null then
|
||||||
|
{
|
||||||
|
limit = {
|
||||||
|
context = lib.toInt model.macros.ctx;
|
||||||
|
input = lib.toInt model.macros.ctx;
|
||||||
|
output = lib.toInt model.macros.ctx;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{ }
|
||||||
|
)
|
||||||
|
)
|
||||||
|
textGenModels;
|
||||||
|
|
||||||
|
peerModels = listToAttrs (
|
||||||
|
flatten (
|
||||||
|
map (peer: map (modelName: nameValuePair modelName { name = modelName; }) peer.models) (
|
||||||
|
builtins.attrValues (llamaSwapConfig.peers or { })
|
||||||
|
)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
in
|
||||||
|
localModels // peerModels;
|
||||||
|
}
|
||||||
@@ -1,4 +1,9 @@
|
|||||||
{ config, lib, pkgs, namespace, ... }:
|
{ config
|
||||||
|
, lib
|
||||||
|
, pkgs
|
||||||
|
, namespace
|
||||||
|
, ...
|
||||||
|
}:
|
||||||
let
|
let
|
||||||
cfg = config.${namespace}.services.swww;
|
cfg = config.${namespace}.services.swww;
|
||||||
in
|
in
|
||||||
|
|||||||
454
modules/nixos/services/llama-swap/config.nix
Normal file
454
modules/nixos/services/llama-swap/config.nix
Normal file
@@ -0,0 +1,454 @@
|
|||||||
|
{ pkgs }:
|
||||||
|
let
|
||||||
|
llama-cpp = pkgs.reichard.llama-cpp;
|
||||||
|
stable-diffusion-cpp = pkgs.reichard.stable-diffusion-cpp.override {
|
||||||
|
cudaSupport = true;
|
||||||
|
};
|
||||||
|
in
|
||||||
|
{
|
||||||
|
models = {
|
||||||
|
# https://huggingface.co/unsloth/Devstral-Small-2-24B-Instruct-2512-GGUF/tree/main
|
||||||
|
"devstral-small-2-instruct" = {
|
||||||
|
name = "Devstral Small 2 (24B) - Instruct";
|
||||||
|
macros.ctx = "98304";
|
||||||
|
cmd = ''
|
||||||
|
${llama-cpp}/bin/llama-server \
|
||||||
|
--port ''${PORT} \
|
||||||
|
-m /mnt/ssd/Models/Devstral/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL.gguf \
|
||||||
|
--chat-template-file /mnt/ssd/Models/Devstral/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL_template.jinja \
|
||||||
|
--temp 0.15 \
|
||||||
|
-c ''${ctx} \
|
||||||
|
-ctk q8_0 \
|
||||||
|
-ctv q8_0 \
|
||||||
|
-fit off \
|
||||||
|
-dev CUDA0
|
||||||
|
'';
|
||||||
|
metadata = {
|
||||||
|
type = [ "text-generation" ];
|
||||||
|
};
|
||||||
|
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
||||||
|
};
|
||||||
|
|
||||||
|
# https://huggingface.co/unsloth/GLM-4-32B-0414-GGUF/tree/main
|
||||||
|
"glm-4-32b-instruct" = {
|
||||||
|
name = "GLM 4 (32B) - Instruct";
|
||||||
|
macros.ctx = "32768";
|
||||||
|
cmd = ''
|
||||||
|
${llama-cpp}/bin/llama-server \
|
||||||
|
--port ''${PORT} \
|
||||||
|
-m /mnt/ssd/Models/GLM/GLM-4-32B-0414-Q4_K_M.gguf \
|
||||||
|
-c ''${ctx} \
|
||||||
|
--temp 0.6 \
|
||||||
|
--top-k 40 \
|
||||||
|
--top-p 0.95 \
|
||||||
|
--min-p 0.0 \
|
||||||
|
-fit off \
|
||||||
|
-dev CUDA0
|
||||||
|
'';
|
||||||
|
metadata = {
|
||||||
|
type = [ "text-generation" ];
|
||||||
|
};
|
||||||
|
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
||||||
|
};
|
||||||
|
|
||||||
|
# https://huggingface.co/mradermacher/gpt-oss-20b-heretic-v2-i1-GGUF/tree/main
|
||||||
|
"gpt-oss-20b-thinking" = {
|
||||||
|
name = "GPT OSS (20B) - Thinking";
|
||||||
|
macros.ctx = "131072";
|
||||||
|
cmd = ''
|
||||||
|
${llama-cpp}/bin/llama-server \
|
||||||
|
--port ''${PORT} \
|
||||||
|
-m /mnt/ssd/Models/GPT-OSS/gpt-oss-20b-heretic-v2.i1-MXFP4_MOE.gguf \
|
||||||
|
-c ''${ctx} \
|
||||||
|
--temp 1.0 \
|
||||||
|
--top-p 1.0 \
|
||||||
|
--top-k 40 \
|
||||||
|
-dev CUDA0
|
||||||
|
'';
|
||||||
|
metadata = {
|
||||||
|
type = [ "text-generation" ];
|
||||||
|
};
|
||||||
|
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
||||||
|
};
|
||||||
|
|
||||||
|
# https://huggingface.co/mradermacher/GPT-OSS-Cybersecurity-20B-Merged-i1-GGUF/tree/main
|
||||||
|
"gpt-oss-csec-20b-thinking" = {
|
||||||
|
name = "GPT OSS CSEC (20B) - Thinking";
|
||||||
|
macros.ctx = "131072";
|
||||||
|
cmd = ''
|
||||||
|
${llama-cpp}/bin/llama-server \
|
||||||
|
--port ''${PORT} \
|
||||||
|
-m /mnt/ssd/Models/GPT-OSS/GPT-OSS-Cybersecurity-20B-Merged.i1-MXFP4_MOE.gguf \
|
||||||
|
-c ''${ctx} \
|
||||||
|
--temp 1.0 \
|
||||||
|
--top-p 1.0 \
|
||||||
|
--top-k 40 \
|
||||||
|
-dev CUDA0
|
||||||
|
'';
|
||||||
|
metadata = {
|
||||||
|
type = [ "text-generation" ];
|
||||||
|
};
|
||||||
|
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
||||||
|
};
|
||||||
|
|
||||||
|
# https://huggingface.co/unsloth/Qwen3-Next-80B-A3B-Instruct-GGUF/tree/main
|
||||||
|
"qwen3-next-80b-instruct" = {
|
||||||
|
name = "Qwen3 Next (80B) - Instruct";
|
||||||
|
macros.ctx = "262144";
|
||||||
|
cmd = ''
|
||||||
|
${llama-cpp}/bin/llama-server \
|
||||||
|
--port ''${PORT} \
|
||||||
|
-m /mnt/ssd/Models/Qwen3/Qwen3-Next-80B-A3B-Instruct-UD-Q2_K_XL.gguf \
|
||||||
|
-c ''${ctx} \
|
||||||
|
--temp 0.7 \
|
||||||
|
--min-p 0.0 \
|
||||||
|
--top-p 0.8 \
|
||||||
|
--top-k 20 \
|
||||||
|
--repeat-penalty 1.05 \
|
||||||
|
-ctk q8_0 \
|
||||||
|
-ctv q8_0 \
|
||||||
|
-fit off
|
||||||
|
'';
|
||||||
|
metadata = {
|
||||||
|
type = [ "text-generation" ];
|
||||||
|
};
|
||||||
|
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
||||||
|
};
|
||||||
|
|
||||||
|
# https://huggingface.co/unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF/tree/main
|
||||||
|
"qwen3-30b-2507-instruct" = {
|
||||||
|
name = "Qwen3 2507 (30B) - Instruct";
|
||||||
|
macros.ctx = "262144";
|
||||||
|
cmd = ''
|
||||||
|
${llama-cpp}/bin/llama-server \
|
||||||
|
--port ''${PORT} \
|
||||||
|
-m /mnt/ssd/Models/Qwen3/Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf \
|
||||||
|
-c ''${ctx} \
|
||||||
|
--temp 0.7 \
|
||||||
|
--min-p 0.0 \
|
||||||
|
--top-p 0.8 \
|
||||||
|
--top-k 20 \
|
||||||
|
--presence-penalty 1.0 \
|
||||||
|
--repeat-penalty 1.0 \
|
||||||
|
-ctk q8_0 \
|
||||||
|
-ctv q8_0 \
|
||||||
|
-ts 70,30 \
|
||||||
|
-fit off
|
||||||
|
'';
|
||||||
|
metadata = {
|
||||||
|
type = [ "text-generation" ];
|
||||||
|
};
|
||||||
|
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
||||||
|
};
|
||||||
|
|
||||||
|
# https://huggingface.co/unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF/tree/main
|
||||||
|
"qwen3-coder-30b-instruct" = {
|
||||||
|
name = "Qwen3 Coder (30B) - Instruct";
|
||||||
|
macros.ctx = "131072";
|
||||||
|
cmd = ''
|
||||||
|
${llama-cpp}/bin/llama-server \
|
||||||
|
--port ''${PORT} \
|
||||||
|
-m /mnt/ssd/Models/Qwen3/Qwen3-Coder-30B-A3B-Instruct-UD-Q6_K_XL.gguf \
|
||||||
|
-c ''${ctx} \
|
||||||
|
--temp 0.7 \
|
||||||
|
--min-p 0.0 \
|
||||||
|
--top-p 0.8 \
|
||||||
|
--top-k 20 \
|
||||||
|
--repeat-penalty 1.05 \
|
||||||
|
-ctk q8_0 \
|
||||||
|
-ctv q8_0 \
|
||||||
|
-ts 70,30 \
|
||||||
|
-fit off
|
||||||
|
'';
|
||||||
|
metadata = {
|
||||||
|
type = [ "text-generation" ];
|
||||||
|
};
|
||||||
|
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
||||||
|
};
|
||||||
|
|
||||||
|
# https://huggingface.co/unsloth/Qwen3-30B-A3B-Thinking-2507-GGUF/tree/main
|
||||||
|
"qwen3-30b-2507-thinking" = {
|
||||||
|
name = "Qwen3 2507 (30B) - Thinking";
|
||||||
|
macros.ctx = "262144";
|
||||||
|
cmd = ''
|
||||||
|
${llama-cpp}/bin/llama-server \
|
||||||
|
--port ''${PORT} \
|
||||||
|
-m /mnt/ssd/Models/Qwen3/Qwen3-30B-A3B-Thinking-2507-UD-Q4_K_XL.gguf \
|
||||||
|
-c ''${ctx} \
|
||||||
|
--temp 0.6 \
|
||||||
|
--min-p 0.0 \
|
||||||
|
--top-p 0.95 \
|
||||||
|
--top-k 20 \
|
||||||
|
--presence-penalty 1.0 \
|
||||||
|
--repeat-penalty 1.0 \
|
||||||
|
-ctk q8_0 \
|
||||||
|
-ctv q8_0 \
|
||||||
|
-ts 70,30 \
|
||||||
|
-fit off
|
||||||
|
'';
|
||||||
|
metadata = {
|
||||||
|
type = [ "text-generation" ];
|
||||||
|
};
|
||||||
|
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
||||||
|
};
|
||||||
|
|
||||||
|
# https://huggingface.co/unsloth/Nemotron-3-Nano-30B-A3B-GGUF/tree/main
|
||||||
|
"nemotron-3-nano-30b-thinking" = {
|
||||||
|
name = "Nemotron 3 Nano (30B) - Thinking";
|
||||||
|
macros.ctx = "1048576";
|
||||||
|
cmd = ''
|
||||||
|
${llama-cpp}/bin/llama-server \
|
||||||
|
--port ''${PORT} \
|
||||||
|
-m /mnt/ssd/Models/Nemotron/Nemotron-3-Nano-30B-A3B-UD-Q4_K_XL.gguf \
|
||||||
|
-c ''${ctx} \
|
||||||
|
--temp 1.1 \
|
||||||
|
--top-p 0.95 \
|
||||||
|
-fit off
|
||||||
|
'';
|
||||||
|
metadata = {
|
||||||
|
type = [ "text-generation" ];
|
||||||
|
};
|
||||||
|
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
||||||
|
};
|
||||||
|
|
||||||
|
# https://huggingface.co/unsloth/Qwen3-VL-8B-Instruct-GGUF/tree/main
|
||||||
|
"qwen3-8b-vision" = {
|
||||||
|
name = "Qwen3 Vision (8B) - Thinking";
|
||||||
|
macros.ctx = "65536";
|
||||||
|
cmd = ''
|
||||||
|
${llama-cpp}/bin/llama-server \
|
||||||
|
--port ''${PORT} \
|
||||||
|
-m /mnt/ssd/Models/Qwen3/Qwen3-VL-8B-Instruct-UD-Q4_K_XL.gguf \
|
||||||
|
--mmproj /mnt/ssd/Models/Qwen3/Qwen3-VL-8B-Instruct-UD-Q4_K_XL_mmproj-F16.gguf \
|
||||||
|
-c ''${ctx} \
|
||||||
|
--temp 0.7 \
|
||||||
|
--min-p 0.0 \
|
||||||
|
--top-p 0.8 \
|
||||||
|
--top-k 20 \
|
||||||
|
-ctk q8_0 \
|
||||||
|
-ctv q8_0 \
|
||||||
|
-fit off \
|
||||||
|
-dev CUDA1
|
||||||
|
'';
|
||||||
|
metadata = {
|
||||||
|
type = [ "text-generation" ];
|
||||||
|
};
|
||||||
|
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
||||||
|
};
|
||||||
|
|
||||||
|
# https://huggingface.co/unsloth/Qwen2.5-Coder-7B-Instruct-128K-GGUF/tree/main
|
||||||
|
"qwen2.5-coder-7b-instruct" = {
|
||||||
|
name = "Qwen2.5 Coder (7B) - Instruct";
|
||||||
|
macros.ctx = "131072";
|
||||||
|
cmd = ''
|
||||||
|
${llama-cpp}/bin/llama-server \
|
||||||
|
-m /mnt/ssd/Models/Qwen2.5/Qwen2.5-Coder-7B-Instruct-Q8_0.gguf \
|
||||||
|
--fim-qwen-7b-default \
|
||||||
|
-c ''${ctx} \
|
||||||
|
--port ''${PORT} \
|
||||||
|
-fit off \
|
||||||
|
-dev CUDA1
|
||||||
|
'';
|
||||||
|
metadata = {
|
||||||
|
type = [ "text-generation" ];
|
||||||
|
};
|
||||||
|
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
||||||
|
};
|
||||||
|
|
||||||
|
# https://huggingface.co/unsloth/Qwen2.5-Coder-3B-Instruct-128K-GGUF/tree/main
|
||||||
|
"qwen2.5-coder-3b-instruct" = {
|
||||||
|
name = "Qwen2.5 Coder (3B) - Instruct";
|
||||||
|
macros.ctx = "131072";
|
||||||
|
cmd = ''
|
||||||
|
${llama-cpp}/bin/llama-server \
|
||||||
|
-m /mnt/ssd/Models/Qwen2.5/Qwen2.5-Coder-3B-Instruct-Q8_0.gguf \
|
||||||
|
--fim-qwen-3b-default \
|
||||||
|
--port ''${PORT} \
|
||||||
|
-c ''${ctx} \
|
||||||
|
-fit off \
|
||||||
|
-dev CUDA1
|
||||||
|
'';
|
||||||
|
metadata = {
|
||||||
|
type = [ "text-generation" ];
|
||||||
|
};
|
||||||
|
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
||||||
|
};
|
||||||
|
|
||||||
|
# https://huggingface.co/unsloth/Qwen3-4B-Instruct-2507-GGUF/tree/main
|
||||||
|
"qwen3-4b-2507-instruct" = {
|
||||||
|
name = "Qwen3 2507 (4B) - Instruct";
|
||||||
|
macros.ctx = "98304";
|
||||||
|
cmd = ''
|
||||||
|
${llama-cpp}/bin/llama-server \
|
||||||
|
--port ''${PORT} \
|
||||||
|
-m /mnt/ssd/Models/Qwen3/Qwen3-4B-Instruct-2507-Q4_K_M.gguf \
|
||||||
|
-c ''${ctx} \
|
||||||
|
-fit off \
|
||||||
|
-ctk q8_0 \
|
||||||
|
-ctv q8_0 \
|
||||||
|
-dev CUDA1
|
||||||
|
'';
|
||||||
|
metadata = {
|
||||||
|
type = [ "text-generation" ];
|
||||||
|
};
|
||||||
|
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
||||||
|
};
|
||||||
|
|
||||||
|
# ---------------------------------------
|
||||||
|
# ---------- Stable Diffussion ----------
|
||||||
|
# ---------------------------------------
|
||||||
|
|
||||||
|
"z-image-turbo" = {
|
||||||
|
name = "Z-Image-Turbo";
|
||||||
|
checkEndpoint = "/";
|
||||||
|
cmd = ''
|
||||||
|
${stable-diffusion-cpp}/bin/sd-server \
|
||||||
|
--listen-port ''${PORT} \
|
||||||
|
--diffusion-fa \
|
||||||
|
--diffusion-model /mnt/ssd/StableDiffusion/ZImageTurbo/z-image-turbo-Q8_0.gguf \
|
||||||
|
--vae /mnt/ssd/StableDiffusion/ZImageTurbo/ae.safetensors \
|
||||||
|
--llm /mnt/ssd/Models/Qwen3/Qwen3-4B-Instruct-2507-Q4_K_M.gguf \
|
||||||
|
--cfg-scale 1.0 \
|
||||||
|
--steps 8 \
|
||||||
|
--rng cuda
|
||||||
|
'';
|
||||||
|
metadata = {
|
||||||
|
type = [ "image-generation" ];
|
||||||
|
};
|
||||||
|
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
||||||
|
};
|
||||||
|
|
||||||
|
"qwen-image-edit-2511" = {
|
||||||
|
name = "Qwen Image Edit 2511";
|
||||||
|
checkEndpoint = "/";
|
||||||
|
cmd = ''
|
||||||
|
${stable-diffusion-cpp}/bin/sd-server \
|
||||||
|
--listen-port ''${PORT} \
|
||||||
|
--diffusion-fa \
|
||||||
|
--qwen-image-zero-cond-t \
|
||||||
|
--diffusion-model /mnt/ssd/StableDiffusion/QwenImage/qwen-image-edit-2511-Q5_K_M.gguf \
|
||||||
|
--vae /mnt/ssd/StableDiffusion/QwenImage/qwen_image_vae.safetensors \
|
||||||
|
--llm /mnt/ssd/Models/Qwen2.5/Qwen2.5-VL-7B-Instruct.Q4_K_M.gguf \
|
||||||
|
--lora-model-dir /mnt/ssd/StableDiffusion/QwenImage/Loras \
|
||||||
|
--cfg-scale 2.5 \
|
||||||
|
--sampling-method euler \
|
||||||
|
--flow-shift 3 \
|
||||||
|
--steps 20 \
|
||||||
|
--rng cuda
|
||||||
|
'';
|
||||||
|
metadata = {
|
||||||
|
type = [
|
||||||
|
"image-edit"
|
||||||
|
"image-generation"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
||||||
|
};
|
||||||
|
|
||||||
|
"qwen-image-2512" = {
|
||||||
|
name = "Qwen Image 2512";
|
||||||
|
checkEndpoint = "/";
|
||||||
|
cmd = ''
|
||||||
|
${stable-diffusion-cpp}/bin/sd-server \
|
||||||
|
--listen-port ''${PORT} \
|
||||||
|
--diffusion-fa \
|
||||||
|
--diffusion-model /mnt/ssd/StableDiffusion/QwenImage/qwen-image-2512-Q5_K_M.gguf \
|
||||||
|
--vae /mnt/ssd/StableDiffusion/QwenImage/qwen_image_vae.safetensors \
|
||||||
|
--llm /mnt/ssd/Models/Qwen2.5/Qwen2.5-VL-7B-Instruct.Q4_K_M.gguf \
|
||||||
|
--lora-model-dir /mnt/ssd/StableDiffusion/QwenImage/Loras \
|
||||||
|
--cfg-scale 2.5 \
|
||||||
|
--sampling-method euler \
|
||||||
|
--flow-shift 3 \
|
||||||
|
--steps 20 \
|
||||||
|
--rng cuda
|
||||||
|
'';
|
||||||
|
metadata = {
|
||||||
|
type = [ "image-generation" ];
|
||||||
|
};
|
||||||
|
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
||||||
|
};
|
||||||
|
|
||||||
|
"chroma-radiance" = {
|
||||||
|
name = "Chroma Radiance";
|
||||||
|
checkEndpoint = "/";
|
||||||
|
cmd = ''
|
||||||
|
${stable-diffusion-cpp}/bin/sd-server \
|
||||||
|
--listen-port ''${PORT} \
|
||||||
|
--diffusion-fa --chroma-disable-dit-mask \
|
||||||
|
--diffusion-model /mnt/ssd/StableDiffusion/Chroma/chroma_radiance_x0_q8.gguf \
|
||||||
|
--t5xxl /mnt/ssd/StableDiffusion/Chroma/t5xxl_fp16.safetensors \
|
||||||
|
--cfg-scale 4.0 \
|
||||||
|
--sampling-method euler \
|
||||||
|
--rng cuda
|
||||||
|
'';
|
||||||
|
metadata = {
|
||||||
|
type = [ "image-generation" ];
|
||||||
|
};
|
||||||
|
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
groups = {
|
||||||
|
shared = {
|
||||||
|
swap = true;
|
||||||
|
exclusive = false;
|
||||||
|
members = [
|
||||||
|
"nemotron-3-nano-30b-thinking"
|
||||||
|
"qwen3-30b-2507-instruct"
|
||||||
|
"qwen3-30b-2507-thinking"
|
||||||
|
"qwen3-coder-30b-instruct"
|
||||||
|
"qwen3-next-80b-instruct"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
cuda0 = {
|
||||||
|
swap = true;
|
||||||
|
exclusive = false;
|
||||||
|
members = [
|
||||||
|
"devstral-small-2-instruct"
|
||||||
|
"glm-4-32b-instruct"
|
||||||
|
"gpt-oss-20b-thinking"
|
||||||
|
"gpt-oss-csec-20b-thinking"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
cuda1 = {
|
||||||
|
swap = true;
|
||||||
|
exclusive = false;
|
||||||
|
members = [
|
||||||
|
"qwen2.5-coder-3b-instruct"
|
||||||
|
"qwen2.5-coder-7b-instruct"
|
||||||
|
"qwen3-4b-2507-instruct"
|
||||||
|
"qwen3-8b-vision"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
peers = {
|
||||||
|
synthetic = {
|
||||||
|
proxy = "https://api.synthetic.new/openai/";
|
||||||
|
models = [
|
||||||
|
"hf:deepseek-ai/DeepSeek-R1-0528"
|
||||||
|
"hf:deepseek-ai/DeepSeek-V3"
|
||||||
|
"hf:deepseek-ai/DeepSeek-V3-0324"
|
||||||
|
"hf:deepseek-ai/DeepSeek-V3.1"
|
||||||
|
"hf:deepseek-ai/DeepSeek-V3.1-Terminus"
|
||||||
|
"hf:deepseek-ai/DeepSeek-V3.2"
|
||||||
|
"hf:meta-llama/Llama-3.3-70B-Instruct"
|
||||||
|
"hf:meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
|
||||||
|
"hf:MiniMaxAI/MiniMax-M2"
|
||||||
|
"hf:MiniMaxAI/MiniMax-M2.1"
|
||||||
|
"hf:moonshotai/Kimi-K2-Instruct-0905"
|
||||||
|
"hf:moonshotai/Kimi-K2-Thinking"
|
||||||
|
"hf:openai/gpt-oss-120b"
|
||||||
|
"hf:Qwen/Qwen3-235B-A22B-Instruct-2507"
|
||||||
|
"hf:Qwen/Qwen3-235B-A22B-Thinking-2507"
|
||||||
|
"hf:Qwen/Qwen3-Coder-480B-A35B-Instruct"
|
||||||
|
"hf:Qwen/Qwen3-VL-235B-A22B-Instruct"
|
||||||
|
"hf:zai-org/GLM-4.5"
|
||||||
|
"hf:zai-org/GLM-4.6"
|
||||||
|
"hf:zai-org/GLM-4.7"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -5,18 +5,20 @@
|
|||||||
, ...
|
, ...
|
||||||
}:
|
}:
|
||||||
let
|
let
|
||||||
inherit (lib) mkIf mkEnableOption;
|
inherit (lib) mkIf mkEnableOption recursiveUpdate;
|
||||||
cfg = config.${namespace}.services.llama-swap;
|
cfg = config.${namespace}.services.llama-swap;
|
||||||
|
|
||||||
llama-swap = pkgs.reichard.llama-swap;
|
llama-swap = pkgs.reichard.llama-swap;
|
||||||
llama-cpp = pkgs.reichard.llama-cpp;
|
|
||||||
stable-diffusion-cpp = pkgs.reichard.stable-diffusion-cpp.override {
|
|
||||||
cudaSupport = true;
|
|
||||||
};
|
|
||||||
in
|
in
|
||||||
{
|
{
|
||||||
options.${namespace}.services.llama-swap = {
|
options.${namespace}.services.llama-swap = {
|
||||||
enable = mkEnableOption "enable llama-swap service";
|
enable = mkEnableOption "enable llama-swap service";
|
||||||
|
config = lib.mkOption {
|
||||||
|
type = lib.types.unspecified;
|
||||||
|
default = import ./config.nix { inherit pkgs; };
|
||||||
|
readOnly = true;
|
||||||
|
description = "The llama-swap configuration data";
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
config = mkIf cfg.enable {
|
config = mkIf cfg.enable {
|
||||||
@@ -92,413 +94,11 @@ in
|
|||||||
owner = "llama-swap";
|
owner = "llama-swap";
|
||||||
group = "llama-swap";
|
group = "llama-swap";
|
||||||
mode = "0400";
|
mode = "0400";
|
||||||
content = builtins.toJSON {
|
content = builtins.toJSON (
|
||||||
models = {
|
recursiveUpdate cfg.config {
|
||||||
# https://huggingface.co/unsloth/Devstral-Small-2-24B-Instruct-2512-GGUF/tree/main
|
peers.synthetic.apiKey = config.sops.placeholder.synthetic_apikey;
|
||||||
"devstral-small-2-instruct" = {
|
}
|
||||||
name = "Devstral Small 2 (24B) - Instruct";
|
);
|
||||||
cmd = ''
|
|
||||||
${llama-cpp}/bin/llama-server \
|
|
||||||
--port ''${PORT} \
|
|
||||||
-m /mnt/ssd/Models/Devstral/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL.gguf \
|
|
||||||
--chat-template-file /mnt/ssd/Models/Devstral/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL_template.jinja \
|
|
||||||
--temp 0.15 \
|
|
||||||
-c 98304 \
|
|
||||||
-ctk q8_0 \
|
|
||||||
-ctv q8_0 \
|
|
||||||
-fit off \
|
|
||||||
-dev CUDA0
|
|
||||||
'';
|
|
||||||
metadata = {
|
|
||||||
type = [ "text-generation" ];
|
|
||||||
};
|
|
||||||
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
|
||||||
};
|
|
||||||
|
|
||||||
# https://huggingface.co/mradermacher/gpt-oss-20b-heretic-v2-i1-GGUF/tree/main
|
|
||||||
"gpt-oss-20b-thinking" = {
|
|
||||||
name = "GPT OSS (20B) - Thinking";
|
|
||||||
cmd = ''
|
|
||||||
${llama-cpp}/bin/llama-server \
|
|
||||||
--port ''${PORT} \
|
|
||||||
-m /mnt/ssd/Models/GPT-OSS/gpt-oss-20b-heretic-v2.i1-MXFP4_MOE.gguf \
|
|
||||||
-c 131072 \
|
|
||||||
--temp 1.0 \
|
|
||||||
--top-p 1.0 \
|
|
||||||
--top-k 40 \
|
|
||||||
-dev CUDA0
|
|
||||||
'';
|
|
||||||
metadata = {
|
|
||||||
type = [ "text-generation" ];
|
|
||||||
};
|
|
||||||
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
|
||||||
};
|
|
||||||
|
|
||||||
# https://huggingface.co/mradermacher/GPT-OSS-Cybersecurity-20B-Merged-i1-GGUF/tree/main
|
|
||||||
"gpt-oss-csec-20b-thinking" = {
|
|
||||||
name = "GPT OSS CSEC (20B) - Thinking";
|
|
||||||
cmd = ''
|
|
||||||
${llama-cpp}/bin/llama-server \
|
|
||||||
--port ''${PORT} \
|
|
||||||
-m /mnt/ssd/Models/GPT-OSS/GPT-OSS-Cybersecurity-20B-Merged.i1-MXFP4_MOE.gguf \
|
|
||||||
-c 131072 \
|
|
||||||
--temp 1.0 \
|
|
||||||
--top-p 1.0 \
|
|
||||||
--top-k 40 \
|
|
||||||
-dev CUDA0
|
|
||||||
'';
|
|
||||||
metadata = {
|
|
||||||
type = [ "text-generation" ];
|
|
||||||
};
|
|
||||||
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
|
||||||
};
|
|
||||||
|
|
||||||
# https://huggingface.co/unsloth/Qwen3-Next-80B-A3B-Instruct-GGUF/tree/main
|
|
||||||
"qwen3-next-80b-instruct" = {
|
|
||||||
name = "Qwen3 Next (80B) - Instruct";
|
|
||||||
cmd = ''
|
|
||||||
${llama-cpp}/bin/llama-server \
|
|
||||||
--port ''${PORT} \
|
|
||||||
-m /mnt/ssd/Models/Qwen3/Qwen3-Next-80B-A3B-Instruct-UD-Q2_K_XL.gguf \
|
|
||||||
-c 262144 \
|
|
||||||
--temp 0.7 \
|
|
||||||
--min-p 0.0 \
|
|
||||||
--top-p 0.8 \
|
|
||||||
--top-k 20 \
|
|
||||||
--repeat-penalty 1.05 \
|
|
||||||
-ctk q8_0 \
|
|
||||||
-ctv q8_0 \
|
|
||||||
-fit off
|
|
||||||
'';
|
|
||||||
metadata = {
|
|
||||||
type = [ "text-generation" ];
|
|
||||||
};
|
|
||||||
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
|
||||||
};
|
|
||||||
|
|
||||||
# https://huggingface.co/unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF/tree/main
|
|
||||||
"qwen3-30b-2507-instruct" = {
|
|
||||||
name = "Qwen3 2507 (30B) - Instruct";
|
|
||||||
cmd = ''
|
|
||||||
${llama-cpp}/bin/llama-server \
|
|
||||||
--port ''${PORT} \
|
|
||||||
-m /mnt/ssd/Models/Qwen3/Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf \
|
|
||||||
-c 262144 \
|
|
||||||
--temp 0.7 \
|
|
||||||
--min-p 0.0 \
|
|
||||||
--top-p 0.8 \
|
|
||||||
--top-k 20 \
|
|
||||||
--repeat-penalty 1.05 \
|
|
||||||
-ctk q8_0 \
|
|
||||||
-ctv q8_0 \
|
|
||||||
-ts 70,30 \
|
|
||||||
-fit off
|
|
||||||
'';
|
|
||||||
metadata = {
|
|
||||||
type = [ "text-generation" ];
|
|
||||||
};
|
|
||||||
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
|
||||||
};
|
|
||||||
|
|
||||||
# https://huggingface.co/unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF/tree/main
|
|
||||||
"qwen3-coder-30b-instruct" = {
|
|
||||||
name = "Qwen3 Coder (30B) - Instruct";
|
|
||||||
cmd = ''
|
|
||||||
${llama-cpp}/bin/llama-server \
|
|
||||||
--port ''${PORT} \
|
|
||||||
-m /mnt/ssd/Models/Qwen3/Qwen3-Coder-30B-A3B-Instruct-UD-Q6_K_XL.gguf \
|
|
||||||
-c 131072 \
|
|
||||||
--temp 0.7 \
|
|
||||||
--min-p 0.0 \
|
|
||||||
--top-p 0.8 \
|
|
||||||
--top-k 20 \
|
|
||||||
--repeat-penalty 1.05 \
|
|
||||||
-ctk q8_0 \
|
|
||||||
-ctv q8_0 \
|
|
||||||
-ts 70,30 \
|
|
||||||
-fit off
|
|
||||||
'';
|
|
||||||
metadata = {
|
|
||||||
type = [ "text-generation" ];
|
|
||||||
};
|
|
||||||
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
|
||||||
};
|
|
||||||
|
|
||||||
# https://huggingface.co/unsloth/Qwen3-30B-A3B-Thinking-2507-GGUF/tree/main
|
|
||||||
"qwen3-30b-2507-thinking" = {
|
|
||||||
name = "Qwen3 2507 (30B) - Thinking";
|
|
||||||
cmd = ''
|
|
||||||
${llama-cpp}/bin/llama-server \
|
|
||||||
--port ''${PORT} \
|
|
||||||
-m /mnt/ssd/Models/Qwen3/Qwen3-30B-A3B-Thinking-2507-UD-Q4_K_XL.gguf \
|
|
||||||
-c 262144 \
|
|
||||||
--temp 0.7 \
|
|
||||||
--min-p 0.0 \
|
|
||||||
--top-p 0.8 \
|
|
||||||
--top-k 20 \
|
|
||||||
--repeat-penalty 1.05 \
|
|
||||||
-ctk q8_0 \
|
|
||||||
-ctv q8_0 \
|
|
||||||
-ts 70,30 \
|
|
||||||
-fit off
|
|
||||||
'';
|
|
||||||
metadata = {
|
|
||||||
type = [ "text-generation" ];
|
|
||||||
};
|
|
||||||
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
|
||||||
};
|
|
||||||
|
|
||||||
# https://huggingface.co/unsloth/Nemotron-3-Nano-30B-A3B-GGUF/tree/main
|
|
||||||
"nemotron-3-nano-30b-thinking" = {
|
|
||||||
name = "Nemotron 3 Nano (30B) - Thinking";
|
|
||||||
cmd = ''
|
|
||||||
${llama-cpp}/bin/llama-server \
|
|
||||||
--port ''${PORT} \
|
|
||||||
-m /mnt/ssd/Models/Nemotron/Nemotron-3-Nano-30B-A3B-UD-Q4_K_XL.gguf \
|
|
||||||
-c 1048576 \
|
|
||||||
--temp 1.1 \
|
|
||||||
--top-p 0.95 \
|
|
||||||
-fit off
|
|
||||||
'';
|
|
||||||
metadata = {
|
|
||||||
type = [ "text-generation" ];
|
|
||||||
};
|
|
||||||
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
|
||||||
};
|
|
||||||
|
|
||||||
# https://huggingface.co/unsloth/Qwen3-VL-8B-Instruct-GGUF/tree/main
|
|
||||||
"qwen3-8b-vision" = {
|
|
||||||
name = "Qwen3 Vision (8B) - Thinking";
|
|
||||||
cmd = ''
|
|
||||||
${llama-cpp}/bin/llama-server \
|
|
||||||
--port ''${PORT} \
|
|
||||||
-m /mnt/ssd/Models/Qwen3/Qwen3-VL-8B-Instruct-UD-Q4_K_XL.gguf \
|
|
||||||
--mmproj /mnt/ssd/Models/Qwen3/Qwen3-VL-8B-Instruct-UD-Q4_K_XL_mmproj-F16.gguf \
|
|
||||||
-c 65536 \
|
|
||||||
--temp 0.7 \
|
|
||||||
--min-p 0.0 \
|
|
||||||
--top-p 0.8 \
|
|
||||||
--top-k 20 \
|
|
||||||
-ctk q8_0 \
|
|
||||||
-ctv q8_0 \
|
|
||||||
-fit off \
|
|
||||||
-dev CUDA1
|
|
||||||
'';
|
|
||||||
metadata = {
|
|
||||||
type = [ "text-generation" ];
|
|
||||||
};
|
|
||||||
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
|
||||||
};
|
|
||||||
|
|
||||||
# https://huggingface.co/unsloth/Qwen2.5-Coder-7B-Instruct-128K-GGUF/tree/main
|
|
||||||
"qwen2.5-coder-7b-instruct" = {
|
|
||||||
name = "Qwen2.5 Coder (7B) - Instruct";
|
|
||||||
cmd = ''
|
|
||||||
${llama-cpp}/bin/llama-server \
|
|
||||||
-m /mnt/ssd/Models/Qwen2.5/Qwen2.5-Coder-7B-Instruct-Q8_0.gguf \
|
|
||||||
--fim-qwen-7b-default \
|
|
||||||
-c 131072 \
|
|
||||||
--port ''${PORT} \
|
|
||||||
-fit off \
|
|
||||||
-dev CUDA1
|
|
||||||
'';
|
|
||||||
metadata = {
|
|
||||||
type = [ "text-generation" ];
|
|
||||||
};
|
|
||||||
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
|
||||||
};
|
|
||||||
|
|
||||||
# https://huggingface.co/unsloth/Qwen2.5-Coder-3B-Instruct-128K-GGUF/tree/main
|
|
||||||
"qwen2.5-coder-3b-instruct" = {
|
|
||||||
name = "Qwen2.5 Coder (3B) - Instruct";
|
|
||||||
cmd = ''
|
|
||||||
${llama-cpp}/bin/llama-server \
|
|
||||||
-m /mnt/ssd/Models/Qwen2.5/Qwen2.5-Coder-3B-Instruct-Q8_0.gguf \
|
|
||||||
--fim-qwen-3b-default \
|
|
||||||
--port ''${PORT} \
|
|
||||||
-fit off \
|
|
||||||
-dev CUDA1
|
|
||||||
'';
|
|
||||||
metadata = {
|
|
||||||
type = [ "text-generation" ];
|
|
||||||
};
|
|
||||||
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
|
||||||
};
|
|
||||||
|
|
||||||
# https://huggingface.co/unsloth/Qwen3-4B-Instruct-2507-GGUF/tree/main
|
|
||||||
"qwen3-4b-2507-instruct" = {
|
|
||||||
name = "Qwen3 2507 (4B) - Instruct";
|
|
||||||
cmd = ''
|
|
||||||
${llama-cpp}/bin/llama-server \
|
|
||||||
--port ''${PORT} \
|
|
||||||
-m /mnt/ssd/Models/Qwen3/Qwen3-4B-Instruct-2507-Q4_K_M.gguf \
|
|
||||||
-c 98304 \
|
|
||||||
-fit off \
|
|
||||||
-ctk q8_0 \
|
|
||||||
-ctv q8_0 \
|
|
||||||
-dev CUDA1
|
|
||||||
'';
|
|
||||||
metadata = {
|
|
||||||
type = [ "text-generation" ];
|
|
||||||
};
|
|
||||||
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
|
||||||
};
|
|
||||||
|
|
||||||
"z-image-turbo" = {
|
|
||||||
name = "Z-Image-Turbo";
|
|
||||||
checkEndpoint = "/";
|
|
||||||
cmd = ''
|
|
||||||
${stable-diffusion-cpp}/bin/sd-server \
|
|
||||||
--listen-port ''${PORT} \
|
|
||||||
--diffusion-fa \
|
|
||||||
--diffusion-model /mnt/ssd/StableDiffusion/ZImageTurbo/z-image-turbo-Q8_0.gguf \
|
|
||||||
--vae /mnt/ssd/StableDiffusion/ZImageTurbo/ae.safetensors \
|
|
||||||
--llm /mnt/ssd/Models/Qwen3/Qwen3-4B-Instruct-2507-Q4_K_M.gguf \
|
|
||||||
--cfg-scale 1.0 \
|
|
||||||
--steps 8 \
|
|
||||||
--rng cuda
|
|
||||||
'';
|
|
||||||
metadata = {
|
|
||||||
type = [ "image-generation" ];
|
|
||||||
};
|
|
||||||
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
|
||||||
};
|
|
||||||
|
|
||||||
# https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/tree/main
|
|
||||||
"qwen-image-edit-2511" = {
|
|
||||||
name = "Qwen Image Edit 2511";
|
|
||||||
checkEndpoint = "/";
|
|
||||||
cmd = ''
|
|
||||||
${stable-diffusion-cpp}/bin/sd-server \
|
|
||||||
--listen-port ''${PORT} \
|
|
||||||
--diffusion-fa \
|
|
||||||
--qwen-image-zero-cond-t \
|
|
||||||
--diffusion-model /mnt/ssd/StableDiffusion/QwenImage/qwen-image-edit-2511-Q5_K_M.gguf \
|
|
||||||
--vae /mnt/ssd/StableDiffusion/QwenImage/qwen_image_vae.safetensors \
|
|
||||||
--llm /mnt/ssd/Models/Qwen2.5/Qwen2.5-VL-7B-Instruct.Q4_K_M.gguf \
|
|
||||||
--lora-model-dir /mnt/ssd/StableDiffusion/QwenImage/Loras \
|
|
||||||
--cfg-scale 2.5 \
|
|
||||||
--sampling-method euler \
|
|
||||||
--flow-shift 3 \
|
|
||||||
--steps 20 \
|
|
||||||
--rng cuda
|
|
||||||
'';
|
|
||||||
metadata = {
|
|
||||||
type = [
|
|
||||||
"image-edit"
|
|
||||||
"image-generation"
|
|
||||||
];
|
|
||||||
};
|
|
||||||
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
|
||||||
};
|
|
||||||
|
|
||||||
"qwen-image-2512" = {
|
|
||||||
name = "Qwen Image 2512";
|
|
||||||
checkEndpoint = "/";
|
|
||||||
cmd = ''
|
|
||||||
${stable-diffusion-cpp}/bin/sd-server \
|
|
||||||
--listen-port ''${PORT} \
|
|
||||||
--diffusion-fa \
|
|
||||||
--diffusion-model /mnt/ssd/StableDiffusion/QwenImage/qwen-image-2512-Q5_K_M.gguf \
|
|
||||||
--vae /mnt/ssd/StableDiffusion/QwenImage/qwen_image_vae.safetensors \
|
|
||||||
--llm /mnt/ssd/Models/Qwen2.5/Qwen2.5-VL-7B-Instruct.Q4_K_M.gguf \
|
|
||||||
--lora-model-dir /mnt/ssd/StableDiffusion/QwenImage/Loras \
|
|
||||||
--cfg-scale 2.5 \
|
|
||||||
--sampling-method euler \
|
|
||||||
--flow-shift 3 \
|
|
||||||
--steps 20 \
|
|
||||||
--rng cuda
|
|
||||||
'';
|
|
||||||
metadata = {
|
|
||||||
type = [ "image-generation" ];
|
|
||||||
};
|
|
||||||
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
|
||||||
};
|
|
||||||
|
|
||||||
"chroma-radiance" = {
|
|
||||||
name = "Chroma Radiance";
|
|
||||||
checkEndpoint = "/";
|
|
||||||
cmd = ''
|
|
||||||
${stable-diffusion-cpp}/bin/sd-server \
|
|
||||||
--listen-port ''${PORT} \
|
|
||||||
--diffusion-fa --chroma-disable-dit-mask \
|
|
||||||
--diffusion-model /mnt/ssd/StableDiffusion/Chroma/chroma_radiance_x0_q8.gguf \
|
|
||||||
--t5xxl /mnt/ssd/StableDiffusion/Chroma/t5xxl_fp16.safetensors \
|
|
||||||
--cfg-scale 4.0 \
|
|
||||||
--sampling-method euler \
|
|
||||||
--rng cuda
|
|
||||||
'';
|
|
||||||
metadata = {
|
|
||||||
type = [ "image-generation" ];
|
|
||||||
};
|
|
||||||
env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ];
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
groups = {
|
|
||||||
shared = {
|
|
||||||
swap = true;
|
|
||||||
exclusive = false;
|
|
||||||
members = [
|
|
||||||
"nemotron-3-nano-30b-thinking"
|
|
||||||
"qwen3-30b-2507-instruct"
|
|
||||||
"qwen3-30b-2507-thinking"
|
|
||||||
"qwen3-coder-30b-instruct"
|
|
||||||
"qwen3-next-80b-instruct"
|
|
||||||
];
|
|
||||||
};
|
|
||||||
|
|
||||||
cuda0 = {
|
|
||||||
swap = true;
|
|
||||||
exclusive = false;
|
|
||||||
members = [
|
|
||||||
"devstral-small-2-instruct"
|
|
||||||
"gpt-oss-20b-thinking"
|
|
||||||
"gpt-oss-csec-20b-thinking"
|
|
||||||
];
|
|
||||||
};
|
|
||||||
|
|
||||||
cuda1 = {
|
|
||||||
swap = true;
|
|
||||||
exclusive = false;
|
|
||||||
members = [
|
|
||||||
"qwen2.5-coder-3b-instruct"
|
|
||||||
"qwen2.5-coder-7b-instruct"
|
|
||||||
"qwen3-4b-2507-instruct"
|
|
||||||
"qwen3-8b-vision"
|
|
||||||
];
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
peers = {
|
|
||||||
synthetic = {
|
|
||||||
proxy = "https://api.synthetic.new/openai/";
|
|
||||||
apiKey = "${config.sops.placeholder.synthetic_apikey}";
|
|
||||||
models = [
|
|
||||||
"hf:deepseek-ai/DeepSeek-R1-0528"
|
|
||||||
"hf:deepseek-ai/DeepSeek-V3"
|
|
||||||
"hf:deepseek-ai/DeepSeek-V3-0324"
|
|
||||||
"hf:deepseek-ai/DeepSeek-V3.1"
|
|
||||||
"hf:deepseek-ai/DeepSeek-V3.1-Terminus"
|
|
||||||
"hf:deepseek-ai/DeepSeek-V3.2"
|
|
||||||
"hf:meta-llama/Llama-3.3-70B-Instruct"
|
|
||||||
"hf:meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
|
|
||||||
"hf:MiniMaxAI/MiniMax-M2"
|
|
||||||
"hf:MiniMaxAI/MiniMax-M2.1"
|
|
||||||
"hf:moonshotai/Kimi-K2-Instruct-0905"
|
|
||||||
"hf:moonshotai/Kimi-K2-Thinking"
|
|
||||||
"hf:openai/gpt-oss-120b"
|
|
||||||
"hf:Qwen/Qwen3-235B-A22B-Instruct-2507"
|
|
||||||
"hf:Qwen/Qwen3-235B-A22B-Thinking-2507"
|
|
||||||
"hf:Qwen/Qwen3-Coder-480B-A35B-Instruct"
|
|
||||||
"hf:Qwen/Qwen3-VL-235B-A22B-Instruct"
|
|
||||||
"hf:zai-org/GLM-4.5"
|
|
||||||
"hf:zai-org/GLM-4.6"
|
|
||||||
"hf:zai-org/GLM-4.7"
|
|
||||||
];
|
|
||||||
};
|
|
||||||
};
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user