refactor: migrate from opencode to codecompanion with updated model configurations
This commit is contained in:
@@ -1,10 +1,6 @@
|
|||||||
local llm_endpoint = "https://llm-api.va.reichard.io"
|
local llm_endpoint = "https://llm-api.va.reichard.io"
|
||||||
-- local llm_assistant_model = "gpt-oss-20b-thinking"
|
local llm_assistant_model = "devstral-small-2-instruct"
|
||||||
-- local llm_infill_model = "qwen2.5-coder-3b-instruct"
|
local llm_infill_model = "qwen2.5-coder-3b-instruct"
|
||||||
|
|
||||||
-- Available models: qwen3-30b-2507-instruct, qwen2.5-coder-3b-instruct
|
|
||||||
local llm_assistant_model = "qwen3-30b-2507-instruct"
|
|
||||||
local llm_infill_model = llm_assistant_model
|
|
||||||
|
|
||||||
-- Default Llama - Toggle Llama & Copilot
|
-- Default Llama - Toggle Llama & Copilot
|
||||||
-- vim.g.copilot_filetypes = { ["*"] = false }
|
-- vim.g.copilot_filetypes = { ["*"] = false }
|
||||||
@@ -25,23 +21,6 @@ local function toggle_llm_fim_provider()
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
-- OpenCode Configuration
|
|
||||||
vim.g.opencode_opts = {
|
|
||||||
provider = {
|
|
||||||
enabled = "snacks",
|
|
||||||
snacks = {
|
|
||||||
win = {
|
|
||||||
-- position = "float",
|
|
||||||
enter = true,
|
|
||||||
width = 0.5,
|
|
||||||
-- height = 0.75,
|
|
||||||
},
|
|
||||||
start_insert = true,
|
|
||||||
auto_insert = true,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
-- Copilot Configuration
|
-- Copilot Configuration
|
||||||
vim.g.copilot_no_tab_map = true
|
vim.g.copilot_no_tab_map = true
|
||||||
|
|
||||||
@@ -54,13 +33,54 @@ vim.g.llama_config = {
|
|||||||
enable_at_startup = false,
|
enable_at_startup = false,
|
||||||
}
|
}
|
||||||
|
|
||||||
-- Create KeyMaps
|
-- Configure Code Companion
|
||||||
|
require("plugins.codecompanion.fidget-spinner"):init()
|
||||||
|
local codecompanion = require("codecompanion")
|
||||||
|
codecompanion.setup({
|
||||||
|
display = {
|
||||||
|
chat = {
|
||||||
|
show_token_count = true,
|
||||||
|
window = {
|
||||||
|
layout = "float",
|
||||||
|
width = 0.6,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
adapters = {
|
||||||
|
http = {
|
||||||
|
opts = { show_defaults = false, },
|
||||||
|
copilot = "copilot",
|
||||||
|
llamaswap = function()
|
||||||
|
return require("codecompanion.adapters").extend("openai_compatible", {
|
||||||
|
formatted_name = "LlamaSwap",
|
||||||
|
name = "llamaswap",
|
||||||
|
schema = { model = { default = llm_assistant_model } },
|
||||||
|
env = { url = llm_endpoint },
|
||||||
|
})
|
||||||
|
end,
|
||||||
|
},
|
||||||
|
acp = {
|
||||||
|
opts = { show_defaults = false },
|
||||||
|
opencode = "opencode",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
strategies = {
|
||||||
|
chat = { adapter = "opencode" },
|
||||||
|
inline = { adapter = "llamaswap" },
|
||||||
|
cmd = { adapter = "llamaswap" },
|
||||||
|
},
|
||||||
|
chat = { dispay = "telescope" },
|
||||||
|
memory = { opts = { chat = { enabled = true } } },
|
||||||
|
})
|
||||||
|
|
||||||
|
-- Create KeyMaps for Code Companion
|
||||||
|
vim.keymap.set("n", "<leader>aa", codecompanion.actions, { desc = "Actions" })
|
||||||
vim.keymap.set("n", "<leader>af", toggle_llm_fim_provider, { desc = "Toggle FIM (Llama / Copilot)" })
|
vim.keymap.set("n", "<leader>af", toggle_llm_fim_provider, { desc = "Toggle FIM (Llama / Copilot)" })
|
||||||
vim.keymap.set({ "n", "x" }, "<leader>ai", function() require("opencode").ask("@this: ", { submit = true }) end,
|
vim.keymap.set("n", "<leader>ao", function() require("snacks.terminal").toggle("opencode") end,
|
||||||
{ desc = "Ask OpenCode" })
|
{ desc = "Toggle OpenCode" })
|
||||||
vim.keymap.set({ "n", "x" }, "<leader>aa", function() require("opencode").select() end,
|
vim.keymap.set("v", "<leader>ai", ":CodeCompanion<cr>", { desc = "Inline Prompt" })
|
||||||
{ desc = "Execute OpenCode Action" })
|
vim.keymap.set({ "n", "v" }, "<leader>an", codecompanion.chat, { desc = "New Chat" })
|
||||||
vim.keymap.set({ "n", "t" }, "<leader>at", function() require("opencode").toggle() end, { desc = "Toggle OpenCode" })
|
vim.keymap.set({ "n", "t" }, "<leader>at", codecompanion.toggle, { desc = "Toggle Chat" })
|
||||||
vim.keymap.set('i', '<C-J>', 'copilot#Accept("\\<CR>")', {
|
vim.keymap.set('i', '<C-J>', 'copilot#Accept("\\<CR>")', {
|
||||||
expr = true,
|
expr = true,
|
||||||
replace_keycodes = false
|
replace_keycodes = false
|
||||||
|
|||||||
@@ -52,6 +52,6 @@ require("lualine").setup({
|
|||||||
options = { theme = "catppuccin" },
|
options = { theme = "catppuccin" },
|
||||||
sections = {
|
sections = {
|
||||||
lualine_c = { { pr_status } },
|
lualine_c = { { pr_status } },
|
||||||
lualine_z = { require("opencode").statusline }
|
-- lualine_z = { require("opencode").statusline }
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -0,0 +1,71 @@
|
|||||||
|
local progress = require("fidget.progress")
|
||||||
|
|
||||||
|
local M = {}
|
||||||
|
|
||||||
|
function M:init()
|
||||||
|
local group = vim.api.nvim_create_augroup("CodeCompanionFidgetHooks", {})
|
||||||
|
|
||||||
|
vim.api.nvim_create_autocmd({ "User" }, {
|
||||||
|
pattern = "CodeCompanionRequestStarted",
|
||||||
|
group = group,
|
||||||
|
callback = function(request)
|
||||||
|
local handle = M:create_progress_handle(request)
|
||||||
|
M:store_progress_handle(request.data.id, handle)
|
||||||
|
end,
|
||||||
|
})
|
||||||
|
|
||||||
|
vim.api.nvim_create_autocmd({ "User" }, {
|
||||||
|
pattern = "CodeCompanionRequestFinished",
|
||||||
|
group = group,
|
||||||
|
callback = function(request)
|
||||||
|
local handle = M:pop_progress_handle(request.data.id)
|
||||||
|
if handle then
|
||||||
|
M:report_exit_status(handle, request)
|
||||||
|
handle:finish()
|
||||||
|
end
|
||||||
|
end,
|
||||||
|
})
|
||||||
|
end
|
||||||
|
|
||||||
|
M.handles = {}
|
||||||
|
|
||||||
|
function M:store_progress_handle(id, handle)
|
||||||
|
M.handles[id] = handle
|
||||||
|
end
|
||||||
|
|
||||||
|
function M:pop_progress_handle(id)
|
||||||
|
local handle = M.handles[id]
|
||||||
|
M.handles[id] = nil
|
||||||
|
return handle
|
||||||
|
end
|
||||||
|
|
||||||
|
function M:create_progress_handle(request)
|
||||||
|
return progress.handle.create({
|
||||||
|
title = " Requesting assistance (" .. request.data.adapter.formatted_name .. ")",
|
||||||
|
message = "In progress...",
|
||||||
|
lsp_client = {
|
||||||
|
name = M:llm_role_title(request.data.adapter),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
end
|
||||||
|
|
||||||
|
function M:llm_role_title(adapter)
|
||||||
|
local parts = {}
|
||||||
|
table.insert(parts, adapter.formatted_name)
|
||||||
|
if adapter.model and adapter.model ~= "" then
|
||||||
|
table.insert(parts, "(" .. adapter.model .. ")")
|
||||||
|
end
|
||||||
|
return table.concat(parts, " ")
|
||||||
|
end
|
||||||
|
|
||||||
|
function M:report_exit_status(handle, request)
|
||||||
|
if request.data.status == "success" then
|
||||||
|
handle.message = "Completed"
|
||||||
|
elseif request.data.status == "error" then
|
||||||
|
handle.message = " Error"
|
||||||
|
else
|
||||||
|
handle.message = " Cancelled"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return M
|
||||||
@@ -1,9 +1,8 @@
|
|||||||
{
|
{ pkgs
|
||||||
pkgs,
|
, lib
|
||||||
lib,
|
, config
|
||||||
config,
|
, namespace
|
||||||
namespace,
|
, ...
|
||||||
...
|
|
||||||
}:
|
}:
|
||||||
let
|
let
|
||||||
inherit (lib) mkIf;
|
inherit (lib) mkIf;
|
||||||
@@ -28,9 +27,9 @@ in
|
|||||||
# ------------------
|
# ------------------
|
||||||
cmp-buffer # Buffer Word Completion
|
cmp-buffer # Buffer Word Completion
|
||||||
cmp-cmdline # Command Line Completion
|
cmp-cmdline # Command Line Completion
|
||||||
cmp_luasnip # Snippets Completion
|
|
||||||
cmp-nvim-lsp # Main LSP
|
cmp-nvim-lsp # Main LSP
|
||||||
cmp-path # Path Completion
|
cmp-path # Path Completion
|
||||||
|
cmp_luasnip # Snippets Completion
|
||||||
friendly-snippets # Snippets
|
friendly-snippets # Snippets
|
||||||
lsp_lines-nvim # Inline Diagnostics
|
lsp_lines-nvim # Inline Diagnostics
|
||||||
luasnip # Snippets
|
luasnip # Snippets
|
||||||
@@ -43,6 +42,7 @@ in
|
|||||||
comment-nvim # Code Comments
|
comment-nvim # Code Comments
|
||||||
copilot-vim # GitHub Copilot
|
copilot-vim # GitHub Copilot
|
||||||
diffview-nvim # Diff View
|
diffview-nvim # Diff View
|
||||||
|
fidget-nvim # Notification Helper
|
||||||
gitsigns-nvim # Git Blame
|
gitsigns-nvim # Git Blame
|
||||||
leap-nvim # Quick Movement
|
leap-nvim # Quick Movement
|
||||||
markdown-preview-nvim # Markdown Preview
|
markdown-preview-nvim # Markdown Preview
|
||||||
@@ -51,7 +51,6 @@ in
|
|||||||
nvim-autopairs # Automatically Close Pairs (),[],{}
|
nvim-autopairs # Automatically Close Pairs (),[],{}
|
||||||
octo-nvim # Git Octo
|
octo-nvim # Git Octo
|
||||||
render-markdown-nvim # Markdown Renderer
|
render-markdown-nvim # Markdown Renderer
|
||||||
snacks-nvim # OpenCode
|
|
||||||
snacks-nvim # Snacks
|
snacks-nvim # Snacks
|
||||||
telescope-nvim # Fuzzy Finder
|
telescope-nvim # Fuzzy Finder
|
||||||
vim-nix # Nix Helpers
|
vim-nix # Nix Helpers
|
||||||
@@ -80,18 +79,19 @@ in
|
|||||||
nvim-dap-ui
|
nvim-dap-ui
|
||||||
|
|
||||||
# --------------------
|
# --------------------
|
||||||
# ----- OPENCODE -----
|
# -- CODE COMPANION --
|
||||||
# --------------------
|
# --------------------
|
||||||
(pkgs.vimUtils.buildVimPlugin {
|
(pkgs.vimUtils.buildVimPlugin {
|
||||||
pname = "opencode.nvim";
|
pname = "codecompanion.nvim";
|
||||||
version = "2025-12-17";
|
version = "2025-12-20";
|
||||||
src = pkgs.fetchFromGitHub {
|
src = pkgs.fetchFromGitHub {
|
||||||
owner = "NickvanDyke";
|
owner = "olimorris";
|
||||||
repo = "opencode.nvim";
|
repo = "codecompanion.nvim";
|
||||||
rev = "39a246b597d6050ca319142b5af5a8b81c74e7d9";
|
rev = "a226ca071ebc1d8b5ae1f70800fa9cf4a06a2101";
|
||||||
hash = "sha256-h/Zttho/grrpmcklld15NNGf+3epqLg8RmmRW8eApSo=";
|
sha256 = "sha256-F1nI7q98SPpDjlwPvGy/qFuHvlT1FrbQPcjWrBwLaHU=";
|
||||||
};
|
};
|
||||||
meta.homepage = "https://github.com/NickvanDyke/opencode.nvim/";
|
doCheck = false;
|
||||||
|
meta.homepage = "https://github.com/olimorris/codecompanion.nvim/";
|
||||||
meta.hydraPlatforms = [ ];
|
meta.hydraPlatforms = [ ];
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ in
|
|||||||
enableMcpIntegration = true;
|
enableMcpIntegration = true;
|
||||||
settings = {
|
settings = {
|
||||||
theme = "catppuccin";
|
theme = "catppuccin";
|
||||||
|
model = "llama-swap/devstral-small-2-instruct";
|
||||||
permission = {
|
permission = {
|
||||||
edit = "allow";
|
edit = "allow";
|
||||||
bash = "ask";
|
bash = "ask";
|
||||||
|
|||||||
@@ -94,74 +94,212 @@ in
|
|||||||
# https://huggingface.co/unsloth/Devstral-Small-2-24B-Instruct-2512-GGUF/tree/main
|
# https://huggingface.co/unsloth/Devstral-Small-2-24B-Instruct-2512-GGUF/tree/main
|
||||||
"devstral-small-2-instruct" = {
|
"devstral-small-2-instruct" = {
|
||||||
name = "Devstral Small 2 (24B) - Instruct";
|
name = "Devstral Small 2 (24B) - Instruct";
|
||||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL.gguf -c 98304 -ctk q8_0 -ctv q8_0 -fit off -dev CUDA0";
|
cmd = ''
|
||||||
|
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||||
|
--port ''${PORT} \
|
||||||
|
-m /mnt/ssd/Models/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL.gguf \
|
||||||
|
--chat-template-file /mnt/ssd/Models/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL_template.jinja \
|
||||||
|
-c 98304 \
|
||||||
|
-ctk q8_0 \
|
||||||
|
-ctv q8_0 \
|
||||||
|
-fit off \
|
||||||
|
-dev CUDA0
|
||||||
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
# https://huggingface.co/unsloth/Qwen3-Next-80B-A3B-Instruct-GGUF/tree/main
|
# https://huggingface.co/unsloth/Qwen3-Next-80B-A3B-Instruct-GGUF/tree/main
|
||||||
"qwen3-next-80b-instruct" = {
|
"qwen3-next-80b-instruct" = {
|
||||||
name = "Qwen3 Next (80B) - Instruct";
|
name = "Qwen3 Next (80B) - Instruct";
|
||||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-Next-80B-A3B-Instruct-UD-Q4_K_XL.gguf -c 131072 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 --repeat-penalty 1.05 -ctk q8_0 -ctv q8_0 -fit off -ncmoe 15 -ts 77,23";
|
cmd = ''
|
||||||
|
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||||
|
--port ''${PORT} \
|
||||||
|
-m /mnt/ssd/Models/Qwen3-Next-80B-A3B-Instruct-UD-Q4_K_XL.gguf \
|
||||||
|
-c 131072 \
|
||||||
|
--temp 0.7 \
|
||||||
|
--min-p 0.0 \
|
||||||
|
--top-p 0.8 \
|
||||||
|
--top-k 20 \
|
||||||
|
--repeat-penalty 1.05 \
|
||||||
|
-ctk q8_0 \
|
||||||
|
-ctv q8_0 \
|
||||||
|
-fit off \
|
||||||
|
-ncmoe 15 \
|
||||||
|
-ts 77,23
|
||||||
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
# https://huggingface.co/unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF/tree/main
|
# https://huggingface.co/unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF/tree/main
|
||||||
"qwen3-30b-2507-instruct" = {
|
"qwen3-30b-2507-instruct" = {
|
||||||
name = "Qwen3 2507 (30B) - Instruct";
|
name = "Qwen3 2507 (30B) - Instruct";
|
||||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf -c 262144 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 --repeat-penalty 1.05 -ctk q8_0 -ctv q8_0 -ts 70,30";
|
cmd = ''
|
||||||
|
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||||
|
--port ''${PORT} \
|
||||||
|
-m /mnt/ssd/Models/Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf \
|
||||||
|
-c 262144 \
|
||||||
|
--temp 0.7 \
|
||||||
|
--min-p 0.0 \
|
||||||
|
--top-p 0.8 \
|
||||||
|
--top-k 20 \
|
||||||
|
--repeat-penalty 1.05 \
|
||||||
|
-ctk q8_0 \
|
||||||
|
-ctv q8_0 \
|
||||||
|
-ts 70,30
|
||||||
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
# https://huggingface.co/unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF/tree/main
|
# https://huggingface.co/unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF/tree/main
|
||||||
"qwen3-coder-30b-instruct" = {
|
"qwen3-coder-30b-instruct" = {
|
||||||
name = "Qwen3 Coder (30B) - Instruct";
|
name = "Qwen3 Coder (30B) - Instruct";
|
||||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf -c 262144 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 --repeat-penalty 1.05 -ctk q8_0 -ctv q8_0 -ts 70,30";
|
cmd = ''
|
||||||
|
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||||
|
--port ''${PORT} \
|
||||||
|
-m /mnt/ssd/Models/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf \
|
||||||
|
-c 262144 \
|
||||||
|
--temp 0.7 \
|
||||||
|
--min-p 0.0 \
|
||||||
|
--top-p 0.8 \
|
||||||
|
--top-k 20 \
|
||||||
|
--repeat-penalty 1.05 \
|
||||||
|
-ctk q8_0 \
|
||||||
|
-ctv q8_0 \
|
||||||
|
-ts 70,30
|
||||||
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
# https://huggingface.co/unsloth/Qwen3-30B-A3B-Thinking-2507-GGUF/tree/main
|
# https://huggingface.co/unsloth/Qwen3-30B-A3B-Thinking-2507-GGUF/tree/main
|
||||||
"qwen3-30b-2507-thinking" = {
|
"qwen3-30b-2507-thinking" = {
|
||||||
name = "Qwen3 2507 (30B) - Thinking";
|
name = "Qwen3 2507 (30B) - Thinking";
|
||||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-30B-A3B-Thinking-2507-UD-Q4_K_XL.gguf -c 262144 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 --repeat-penalty 1.05 -ctk q8_0 -ctv q8_0 -ts 70,30";
|
cmd = ''
|
||||||
|
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||||
|
--port ''${PORT} \
|
||||||
|
-m /mnt/ssd/Models/Qwen3-30B-A3B-Thinking-2507-UD-Q4_K_XL.gguf \
|
||||||
|
-c 262144 \
|
||||||
|
--temp 0.7 \
|
||||||
|
--min-p 0.0 \
|
||||||
|
--top-p 0.8 \
|
||||||
|
--top-k 20 \
|
||||||
|
--repeat-penalty 1.05 \
|
||||||
|
-ctk q8_0 \
|
||||||
|
-ctv q8_0 \
|
||||||
|
-ts 70,30
|
||||||
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
# https://huggingface.co/mradermacher/gpt-oss-20b-heretic-v2-i1-GGUF/tree/main
|
# https://huggingface.co/mradermacher/gpt-oss-20b-heretic-v2-i1-GGUF/tree/main
|
||||||
# --chat-template-kwargs '{\"reasoning_effort\":\"low\"}'
|
# --chat-template-kwargs '{\"reasoning_effort\":\"low\"}'
|
||||||
"gpt-oss-20b-thinking" = {
|
"gpt-oss-20b-thinking" = {
|
||||||
name = "GPT OSS (20B) - Thinking";
|
name = "GPT OSS (20B) - Thinking";
|
||||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/gpt-oss-20b-heretic-v2.i1-MXFP4_MOE.gguf -c 131072 --temp 1.0 --top-p 1.0 --top-k 40 -dev CUDA0";
|
cmd = ''
|
||||||
|
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||||
|
--port ''${PORT} \
|
||||||
|
-m /mnt/ssd/Models/gpt-oss-20b-heretic-v2.i1-MXFP4_MOE.gguf \
|
||||||
|
-c 131072 \
|
||||||
|
--temp 1.0 \
|
||||||
|
--top-p 1.0 \
|
||||||
|
--top-k 40 \
|
||||||
|
-dev CUDA0
|
||||||
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
# https://huggingface.co/unsloth/Qwen3-VL-8B-Instruct-GGUF/tree/main
|
# https://huggingface.co/unsloth/Qwen3-VL-8B-Instruct-GGUF/tree/main
|
||||||
"qwen3-8b-vision" = {
|
"qwen3-8b-vision" = {
|
||||||
name = "Qwen3 Vision (8B) - Thinking";
|
name = "Qwen3 Vision (8B) - Thinking";
|
||||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-VL-8B-Instruct-UD-Q4_K_XL.gguf --mmproj /mnt/ssd/Models/Qwen3-VL-8B-Instruct-UD-Q4_K_XL_mmproj-F16.gguf -c 131072 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 -ctk q8_0 -ctv q8_0 -dev CUDA0";
|
cmd = ''
|
||||||
|
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||||
|
--port ''${PORT} \
|
||||||
|
-m /mnt/ssd/Models/Qwen3-VL-8B-Instruct-UD-Q4_K_XL.gguf \
|
||||||
|
--mmproj /mnt/ssd/Models/Qwen3-VL-8B-Instruct-UD-Q4_K_XL_mmproj-F16.gguf \
|
||||||
|
-c 65536 \
|
||||||
|
--temp 0.7 \
|
||||||
|
--min-p 0.0 \
|
||||||
|
--top-p 0.8 \
|
||||||
|
--top-k 20 \
|
||||||
|
-ctk q8_0 \
|
||||||
|
-ctv q8_0 \
|
||||||
|
-dev CUDA1
|
||||||
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
# https://huggingface.co/unsloth/Qwen2.5-Coder-7B-Instruct-128K-GGUF/tree/main
|
# https://huggingface.co/unsloth/Qwen2.5-Coder-7B-Instruct-128K-GGUF/tree/main
|
||||||
"qwen2.5-coder-7b-instruct" = {
|
"qwen2.5-coder-7b-instruct" = {
|
||||||
name = "Qwen2.5 Coder (7B) - Instruct";
|
name = "Qwen2.5 Coder (7B) - Instruct";
|
||||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server -m /mnt/ssd/Models/Qwen2.5-Coder-7B-Instruct-Q8_0.gguf --fim-qwen-7b-default -c 131072 --port \${PORT} --dev CUDA0";
|
cmd = ''
|
||||||
|
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||||
|
-m /mnt/ssd/Models/Qwen2.5-Coder-7B-Instruct-Q8_0.gguf \
|
||||||
|
--fim-qwen-7b-default \
|
||||||
|
-c 131072 \
|
||||||
|
--port ''${PORT} \
|
||||||
|
-dev CUDA1
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
|
# https://huggingface.co/unsloth/Qwen2.5-Coder-3B-Instruct-128K-GGUF/tree/main
|
||||||
|
"qwen2.5-coder-3b-instruct" = {
|
||||||
|
name = "Qwen2.5 Coder (3B) - Instruct";
|
||||||
|
cmd = ''
|
||||||
|
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||||
|
-m /mnt/ssd/Models/Qwen2.5-Coder-3B-Instruct-Q8_0.gguf \
|
||||||
|
--fim-qwen-3b-default \
|
||||||
|
--port ''${PORT} \
|
||||||
|
-fit off \
|
||||||
|
-dev CUDA1
|
||||||
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
# https://huggingface.co/unsloth/SmolLM3-3B-128K-GGUF/tree/main
|
# https://huggingface.co/unsloth/SmolLM3-3B-128K-GGUF/tree/main
|
||||||
"smollm3-3b-instruct" = {
|
"smollm3-3b-instruct" = {
|
||||||
name = "SmolLM3(3B) - Instruct";
|
name = "SmolLM3(3B) - Instruct";
|
||||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/SmolLM3-3B-128K-UD-Q4_K_XL.gguf -c 98304 --temp 0.6 --top-p 0.95 --reasoning-budget 0 -dev CUDA0";
|
cmd = ''
|
||||||
|
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||||
|
--port ''${PORT} \
|
||||||
|
-m /mnt/ssd/Models/SmolLM3-3B-128K-UD-Q4_K_XL.gguf \
|
||||||
|
-c 98304 \
|
||||||
|
--temp 0.6 \
|
||||||
|
--top-p 0.95 \
|
||||||
|
--reasoning-budget 0 \
|
||||||
|
-dev CUDA0
|
||||||
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
# https://huggingface.co/unsloth/ERNIE-4.5-21B-A3B-PT-GGUF/tree/main
|
# https://huggingface.co/unsloth/ERNIE-4.5-21B-A3B-PT-GGUF/tree/main
|
||||||
"ernie4.5-21b-instruct" = {
|
"ernie4.5-21b-instruct" = {
|
||||||
name = "ERNIE4.5 (21B) - Instruct";
|
name = "ERNIE4.5 (21B) - Instruct";
|
||||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/ERNIE-4.5-21B-A3B-PT-UD-Q4_K_XL.gguf -c 98304 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20";
|
cmd = ''
|
||||||
|
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||||
|
--port ''${PORT} \
|
||||||
|
-m /mnt/ssd/Models/ERNIE-4.5-21B-A3B-PT-UD-Q4_K_XL.gguf \
|
||||||
|
-c 98304 \
|
||||||
|
--temp 0.7 \
|
||||||
|
--min-p 0.0 \
|
||||||
|
--top-p 0.8 \
|
||||||
|
--top-k 20
|
||||||
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
# https://huggingface.co/mradermacher/OLMoE-1B-7B-0125-Instruct-GGUF/tree/main
|
# https://huggingface.co/mradermacher/OLMoE-1B-7B-0125-Instruct-GGUF/tree/main
|
||||||
"olmoe-7b-instruct" = {
|
"olmoe-7b-instruct" = {
|
||||||
name = "OLMoE (7B) - Instruct";
|
name = "OLMoE (7B) - Instruct";
|
||||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/OLMoE-1B-7B-0125-Instruct.Q8_0.gguf -dev CUDA1";
|
cmd = ''
|
||||||
|
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||||
|
--port ''${PORT} \
|
||||||
|
-m /mnt/ssd/Models/OLMoE-1B-7B-0125-Instruct.Q8_0.gguf \
|
||||||
|
-dev CUDA1
|
||||||
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
# https://huggingface.co/gabriellarson/Phi-mini-MoE-instruct-GGUF/tree/main
|
# https://huggingface.co/gabriellarson/Phi-mini-MoE-instruct-GGUF/tree/main
|
||||||
"phi-mini-8b-instruct" = {
|
"phi-mini-8b-instruct" = {
|
||||||
name = "Phi mini (8B) - Instruct";
|
name = "Phi mini (8B) - Instruct";
|
||||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Phi-mini-MoE-instruct-Q8_0.gguf --repeat-penalty 1.05 --temp 0.0 --top-p 1.0 --top-k 1 -dev CUDA1";
|
cmd = ''
|
||||||
|
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||||
|
--port ''${PORT} \
|
||||||
|
-m /mnt/ssd/Models/Phi-mini-MoE-instruct-Q8_0.gguf \
|
||||||
|
--repeat-penalty 1.05 \
|
||||||
|
--temp 0.0 \
|
||||||
|
--top-p 1.0 \
|
||||||
|
--top-k 1 \
|
||||||
|
-dev CUDA1
|
||||||
|
'';
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
groups = {
|
groups = {
|
||||||
@@ -169,8 +307,8 @@ in
|
|||||||
swap = false;
|
swap = false;
|
||||||
exclusive = true;
|
exclusive = true;
|
||||||
members = [
|
members = [
|
||||||
"gpt-oss-20b-thinking"
|
"devstral-small-2-instruct" # Primary
|
||||||
"qwen2.5-coder-3b-instruct"
|
"qwen2.5-coder-3b-instruct" # Infill
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user