refactor: migrate from opencode to codecompanion with updated model configurations
This commit is contained in:
@@ -1,10 +1,6 @@
|
||||
local llm_endpoint = "https://llm-api.va.reichard.io"
|
||||
-- local llm_assistant_model = "gpt-oss-20b-thinking"
|
||||
-- local llm_infill_model = "qwen2.5-coder-3b-instruct"
|
||||
|
||||
-- Available models: qwen3-30b-2507-instruct, qwen2.5-coder-3b-instruct
|
||||
local llm_assistant_model = "qwen3-30b-2507-instruct"
|
||||
local llm_infill_model = llm_assistant_model
|
||||
local llm_assistant_model = "devstral-small-2-instruct"
|
||||
local llm_infill_model = "qwen2.5-coder-3b-instruct"
|
||||
|
||||
-- Default Llama - Toggle Llama & Copilot
|
||||
-- vim.g.copilot_filetypes = { ["*"] = false }
|
||||
@@ -25,23 +21,6 @@ local function toggle_llm_fim_provider()
|
||||
end
|
||||
end
|
||||
|
||||
-- OpenCode Configuration
|
||||
vim.g.opencode_opts = {
|
||||
provider = {
|
||||
enabled = "snacks",
|
||||
snacks = {
|
||||
win = {
|
||||
-- position = "float",
|
||||
enter = true,
|
||||
width = 0.5,
|
||||
-- height = 0.75,
|
||||
},
|
||||
start_insert = true,
|
||||
auto_insert = true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
-- Copilot Configuration
|
||||
vim.g.copilot_no_tab_map = true
|
||||
|
||||
@@ -54,13 +33,54 @@ vim.g.llama_config = {
|
||||
enable_at_startup = false,
|
||||
}
|
||||
|
||||
-- Create KeyMaps
|
||||
-- Configure Code Companion
|
||||
require("plugins.codecompanion.fidget-spinner"):init()
|
||||
local codecompanion = require("codecompanion")
|
||||
codecompanion.setup({
|
||||
display = {
|
||||
chat = {
|
||||
show_token_count = true,
|
||||
window = {
|
||||
layout = "float",
|
||||
width = 0.6,
|
||||
}
|
||||
}
|
||||
},
|
||||
adapters = {
|
||||
http = {
|
||||
opts = { show_defaults = false, },
|
||||
copilot = "copilot",
|
||||
llamaswap = function()
|
||||
return require("codecompanion.adapters").extend("openai_compatible", {
|
||||
formatted_name = "LlamaSwap",
|
||||
name = "llamaswap",
|
||||
schema = { model = { default = llm_assistant_model } },
|
||||
env = { url = llm_endpoint },
|
||||
})
|
||||
end,
|
||||
},
|
||||
acp = {
|
||||
opts = { show_defaults = false },
|
||||
opencode = "opencode",
|
||||
}
|
||||
},
|
||||
strategies = {
|
||||
chat = { adapter = "opencode" },
|
||||
inline = { adapter = "llamaswap" },
|
||||
cmd = { adapter = "llamaswap" },
|
||||
},
|
||||
chat = { dispay = "telescope" },
|
||||
memory = { opts = { chat = { enabled = true } } },
|
||||
})
|
||||
|
||||
-- Create KeyMaps for Code Companion
|
||||
vim.keymap.set("n", "<leader>aa", codecompanion.actions, { desc = "Actions" })
|
||||
vim.keymap.set("n", "<leader>af", toggle_llm_fim_provider, { desc = "Toggle FIM (Llama / Copilot)" })
|
||||
vim.keymap.set({ "n", "x" }, "<leader>ai", function() require("opencode").ask("@this: ", { submit = true }) end,
|
||||
{ desc = "Ask OpenCode" })
|
||||
vim.keymap.set({ "n", "x" }, "<leader>aa", function() require("opencode").select() end,
|
||||
{ desc = "Execute OpenCode Action" })
|
||||
vim.keymap.set({ "n", "t" }, "<leader>at", function() require("opencode").toggle() end, { desc = "Toggle OpenCode" })
|
||||
vim.keymap.set("n", "<leader>ao", function() require("snacks.terminal").toggle("opencode") end,
|
||||
{ desc = "Toggle OpenCode" })
|
||||
vim.keymap.set("v", "<leader>ai", ":CodeCompanion<cr>", { desc = "Inline Prompt" })
|
||||
vim.keymap.set({ "n", "v" }, "<leader>an", codecompanion.chat, { desc = "New Chat" })
|
||||
vim.keymap.set({ "n", "t" }, "<leader>at", codecompanion.toggle, { desc = "Toggle Chat" })
|
||||
vim.keymap.set('i', '<C-J>', 'copilot#Accept("\\<CR>")', {
|
||||
expr = true,
|
||||
replace_keycodes = false
|
||||
|
||||
@@ -52,6 +52,6 @@ require("lualine").setup({
|
||||
options = { theme = "catppuccin" },
|
||||
sections = {
|
||||
lualine_c = { { pr_status } },
|
||||
lualine_z = { require("opencode").statusline }
|
||||
-- lualine_z = { require("opencode").statusline }
|
||||
},
|
||||
})
|
||||
|
||||
@@ -0,0 +1,71 @@
|
||||
local progress = require("fidget.progress")
|
||||
|
||||
local M = {}
|
||||
|
||||
function M:init()
|
||||
local group = vim.api.nvim_create_augroup("CodeCompanionFidgetHooks", {})
|
||||
|
||||
vim.api.nvim_create_autocmd({ "User" }, {
|
||||
pattern = "CodeCompanionRequestStarted",
|
||||
group = group,
|
||||
callback = function(request)
|
||||
local handle = M:create_progress_handle(request)
|
||||
M:store_progress_handle(request.data.id, handle)
|
||||
end,
|
||||
})
|
||||
|
||||
vim.api.nvim_create_autocmd({ "User" }, {
|
||||
pattern = "CodeCompanionRequestFinished",
|
||||
group = group,
|
||||
callback = function(request)
|
||||
local handle = M:pop_progress_handle(request.data.id)
|
||||
if handle then
|
||||
M:report_exit_status(handle, request)
|
||||
handle:finish()
|
||||
end
|
||||
end,
|
||||
})
|
||||
end
|
||||
|
||||
M.handles = {}
|
||||
|
||||
function M:store_progress_handle(id, handle)
|
||||
M.handles[id] = handle
|
||||
end
|
||||
|
||||
function M:pop_progress_handle(id)
|
||||
local handle = M.handles[id]
|
||||
M.handles[id] = nil
|
||||
return handle
|
||||
end
|
||||
|
||||
function M:create_progress_handle(request)
|
||||
return progress.handle.create({
|
||||
title = " Requesting assistance (" .. request.data.adapter.formatted_name .. ")",
|
||||
message = "In progress...",
|
||||
lsp_client = {
|
||||
name = M:llm_role_title(request.data.adapter),
|
||||
},
|
||||
})
|
||||
end
|
||||
|
||||
function M:llm_role_title(adapter)
|
||||
local parts = {}
|
||||
table.insert(parts, adapter.formatted_name)
|
||||
if adapter.model and adapter.model ~= "" then
|
||||
table.insert(parts, "(" .. adapter.model .. ")")
|
||||
end
|
||||
return table.concat(parts, " ")
|
||||
end
|
||||
|
||||
function M:report_exit_status(handle, request)
|
||||
if request.data.status == "success" then
|
||||
handle.message = "Completed"
|
||||
elseif request.data.status == "error" then
|
||||
handle.message = " Error"
|
||||
else
|
||||
handle.message = " Cancelled"
|
||||
end
|
||||
end
|
||||
|
||||
return M
|
||||
@@ -1,9 +1,8 @@
|
||||
{
|
||||
pkgs,
|
||||
lib,
|
||||
config,
|
||||
namespace,
|
||||
...
|
||||
{ pkgs
|
||||
, lib
|
||||
, config
|
||||
, namespace
|
||||
, ...
|
||||
}:
|
||||
let
|
||||
inherit (lib) mkIf;
|
||||
@@ -28,9 +27,9 @@ in
|
||||
# ------------------
|
||||
cmp-buffer # Buffer Word Completion
|
||||
cmp-cmdline # Command Line Completion
|
||||
cmp_luasnip # Snippets Completion
|
||||
cmp-nvim-lsp # Main LSP
|
||||
cmp-path # Path Completion
|
||||
cmp_luasnip # Snippets Completion
|
||||
friendly-snippets # Snippets
|
||||
lsp_lines-nvim # Inline Diagnostics
|
||||
luasnip # Snippets
|
||||
@@ -43,6 +42,7 @@ in
|
||||
comment-nvim # Code Comments
|
||||
copilot-vim # GitHub Copilot
|
||||
diffview-nvim # Diff View
|
||||
fidget-nvim # Notification Helper
|
||||
gitsigns-nvim # Git Blame
|
||||
leap-nvim # Quick Movement
|
||||
markdown-preview-nvim # Markdown Preview
|
||||
@@ -51,7 +51,6 @@ in
|
||||
nvim-autopairs # Automatically Close Pairs (),[],{}
|
||||
octo-nvim # Git Octo
|
||||
render-markdown-nvim # Markdown Renderer
|
||||
snacks-nvim # OpenCode
|
||||
snacks-nvim # Snacks
|
||||
telescope-nvim # Fuzzy Finder
|
||||
vim-nix # Nix Helpers
|
||||
@@ -80,18 +79,19 @@ in
|
||||
nvim-dap-ui
|
||||
|
||||
# --------------------
|
||||
# ----- OPENCODE -----
|
||||
# -- CODE COMPANION --
|
||||
# --------------------
|
||||
(pkgs.vimUtils.buildVimPlugin {
|
||||
pname = "opencode.nvim";
|
||||
version = "2025-12-17";
|
||||
pname = "codecompanion.nvim";
|
||||
version = "2025-12-20";
|
||||
src = pkgs.fetchFromGitHub {
|
||||
owner = "NickvanDyke";
|
||||
repo = "opencode.nvim";
|
||||
rev = "39a246b597d6050ca319142b5af5a8b81c74e7d9";
|
||||
hash = "sha256-h/Zttho/grrpmcklld15NNGf+3epqLg8RmmRW8eApSo=";
|
||||
owner = "olimorris";
|
||||
repo = "codecompanion.nvim";
|
||||
rev = "a226ca071ebc1d8b5ae1f70800fa9cf4a06a2101";
|
||||
sha256 = "sha256-F1nI7q98SPpDjlwPvGy/qFuHvlT1FrbQPcjWrBwLaHU=";
|
||||
};
|
||||
meta.homepage = "https://github.com/NickvanDyke/opencode.nvim/";
|
||||
doCheck = false;
|
||||
meta.homepage = "https://github.com/olimorris/codecompanion.nvim/";
|
||||
meta.hydraPlatforms = [ ];
|
||||
})
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ in
|
||||
enableMcpIntegration = true;
|
||||
settings = {
|
||||
theme = "catppuccin";
|
||||
model = "llama-swap/devstral-small-2-instruct";
|
||||
permission = {
|
||||
edit = "allow";
|
||||
bash = "ask";
|
||||
|
||||
@@ -94,74 +94,212 @@ in
|
||||
# https://huggingface.co/unsloth/Devstral-Small-2-24B-Instruct-2512-GGUF/tree/main
|
||||
"devstral-small-2-instruct" = {
|
||||
name = "Devstral Small 2 (24B) - Instruct";
|
||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL.gguf -c 98304 -ctk q8_0 -ctv q8_0 -fit off -dev CUDA0";
|
||||
cmd = ''
|
||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||
--port ''${PORT} \
|
||||
-m /mnt/ssd/Models/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL.gguf \
|
||||
--chat-template-file /mnt/ssd/Models/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL_template.jinja \
|
||||
-c 98304 \
|
||||
-ctk q8_0 \
|
||||
-ctv q8_0 \
|
||||
-fit off \
|
||||
-dev CUDA0
|
||||
'';
|
||||
};
|
||||
|
||||
# https://huggingface.co/unsloth/Qwen3-Next-80B-A3B-Instruct-GGUF/tree/main
|
||||
"qwen3-next-80b-instruct" = {
|
||||
name = "Qwen3 Next (80B) - Instruct";
|
||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-Next-80B-A3B-Instruct-UD-Q4_K_XL.gguf -c 131072 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 --repeat-penalty 1.05 -ctk q8_0 -ctv q8_0 -fit off -ncmoe 15 -ts 77,23";
|
||||
cmd = ''
|
||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||
--port ''${PORT} \
|
||||
-m /mnt/ssd/Models/Qwen3-Next-80B-A3B-Instruct-UD-Q4_K_XL.gguf \
|
||||
-c 131072 \
|
||||
--temp 0.7 \
|
||||
--min-p 0.0 \
|
||||
--top-p 0.8 \
|
||||
--top-k 20 \
|
||||
--repeat-penalty 1.05 \
|
||||
-ctk q8_0 \
|
||||
-ctv q8_0 \
|
||||
-fit off \
|
||||
-ncmoe 15 \
|
||||
-ts 77,23
|
||||
'';
|
||||
};
|
||||
|
||||
# https://huggingface.co/unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF/tree/main
|
||||
"qwen3-30b-2507-instruct" = {
|
||||
name = "Qwen3 2507 (30B) - Instruct";
|
||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf -c 262144 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 --repeat-penalty 1.05 -ctk q8_0 -ctv q8_0 -ts 70,30";
|
||||
cmd = ''
|
||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||
--port ''${PORT} \
|
||||
-m /mnt/ssd/Models/Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf \
|
||||
-c 262144 \
|
||||
--temp 0.7 \
|
||||
--min-p 0.0 \
|
||||
--top-p 0.8 \
|
||||
--top-k 20 \
|
||||
--repeat-penalty 1.05 \
|
||||
-ctk q8_0 \
|
||||
-ctv q8_0 \
|
||||
-ts 70,30
|
||||
'';
|
||||
};
|
||||
|
||||
# https://huggingface.co/unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF/tree/main
|
||||
"qwen3-coder-30b-instruct" = {
|
||||
name = "Qwen3 Coder (30B) - Instruct";
|
||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf -c 262144 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 --repeat-penalty 1.05 -ctk q8_0 -ctv q8_0 -ts 70,30";
|
||||
cmd = ''
|
||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||
--port ''${PORT} \
|
||||
-m /mnt/ssd/Models/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf \
|
||||
-c 262144 \
|
||||
--temp 0.7 \
|
||||
--min-p 0.0 \
|
||||
--top-p 0.8 \
|
||||
--top-k 20 \
|
||||
--repeat-penalty 1.05 \
|
||||
-ctk q8_0 \
|
||||
-ctv q8_0 \
|
||||
-ts 70,30
|
||||
'';
|
||||
};
|
||||
|
||||
# https://huggingface.co/unsloth/Qwen3-30B-A3B-Thinking-2507-GGUF/tree/main
|
||||
"qwen3-30b-2507-thinking" = {
|
||||
name = "Qwen3 2507 (30B) - Thinking";
|
||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-30B-A3B-Thinking-2507-UD-Q4_K_XL.gguf -c 262144 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 --repeat-penalty 1.05 -ctk q8_0 -ctv q8_0 -ts 70,30";
|
||||
cmd = ''
|
||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||
--port ''${PORT} \
|
||||
-m /mnt/ssd/Models/Qwen3-30B-A3B-Thinking-2507-UD-Q4_K_XL.gguf \
|
||||
-c 262144 \
|
||||
--temp 0.7 \
|
||||
--min-p 0.0 \
|
||||
--top-p 0.8 \
|
||||
--top-k 20 \
|
||||
--repeat-penalty 1.05 \
|
||||
-ctk q8_0 \
|
||||
-ctv q8_0 \
|
||||
-ts 70,30
|
||||
'';
|
||||
};
|
||||
|
||||
# https://huggingface.co/mradermacher/gpt-oss-20b-heretic-v2-i1-GGUF/tree/main
|
||||
# --chat-template-kwargs '{\"reasoning_effort\":\"low\"}'
|
||||
"gpt-oss-20b-thinking" = {
|
||||
name = "GPT OSS (20B) - Thinking";
|
||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/gpt-oss-20b-heretic-v2.i1-MXFP4_MOE.gguf -c 131072 --temp 1.0 --top-p 1.0 --top-k 40 -dev CUDA0";
|
||||
cmd = ''
|
||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||
--port ''${PORT} \
|
||||
-m /mnt/ssd/Models/gpt-oss-20b-heretic-v2.i1-MXFP4_MOE.gguf \
|
||||
-c 131072 \
|
||||
--temp 1.0 \
|
||||
--top-p 1.0 \
|
||||
--top-k 40 \
|
||||
-dev CUDA0
|
||||
'';
|
||||
};
|
||||
|
||||
# https://huggingface.co/unsloth/Qwen3-VL-8B-Instruct-GGUF/tree/main
|
||||
"qwen3-8b-vision" = {
|
||||
name = "Qwen3 Vision (8B) - Thinking";
|
||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-VL-8B-Instruct-UD-Q4_K_XL.gguf --mmproj /mnt/ssd/Models/Qwen3-VL-8B-Instruct-UD-Q4_K_XL_mmproj-F16.gguf -c 131072 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 -ctk q8_0 -ctv q8_0 -dev CUDA0";
|
||||
cmd = ''
|
||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||
--port ''${PORT} \
|
||||
-m /mnt/ssd/Models/Qwen3-VL-8B-Instruct-UD-Q4_K_XL.gguf \
|
||||
--mmproj /mnt/ssd/Models/Qwen3-VL-8B-Instruct-UD-Q4_K_XL_mmproj-F16.gguf \
|
||||
-c 65536 \
|
||||
--temp 0.7 \
|
||||
--min-p 0.0 \
|
||||
--top-p 0.8 \
|
||||
--top-k 20 \
|
||||
-ctk q8_0 \
|
||||
-ctv q8_0 \
|
||||
-dev CUDA1
|
||||
'';
|
||||
};
|
||||
|
||||
# https://huggingface.co/unsloth/Qwen2.5-Coder-7B-Instruct-128K-GGUF/tree/main
|
||||
"qwen2.5-coder-7b-instruct" = {
|
||||
name = "Qwen2.5 Coder (7B) - Instruct";
|
||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server -m /mnt/ssd/Models/Qwen2.5-Coder-7B-Instruct-Q8_0.gguf --fim-qwen-7b-default -c 131072 --port \${PORT} --dev CUDA0";
|
||||
cmd = ''
|
||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||
-m /mnt/ssd/Models/Qwen2.5-Coder-7B-Instruct-Q8_0.gguf \
|
||||
--fim-qwen-7b-default \
|
||||
-c 131072 \
|
||||
--port ''${PORT} \
|
||||
-dev CUDA1
|
||||
'';
|
||||
};
|
||||
|
||||
# https://huggingface.co/unsloth/Qwen2.5-Coder-3B-Instruct-128K-GGUF/tree/main
|
||||
"qwen2.5-coder-3b-instruct" = {
|
||||
name = "Qwen2.5 Coder (3B) - Instruct";
|
||||
cmd = ''
|
||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||
-m /mnt/ssd/Models/Qwen2.5-Coder-3B-Instruct-Q8_0.gguf \
|
||||
--fim-qwen-3b-default \
|
||||
--port ''${PORT} \
|
||||
-fit off \
|
||||
-dev CUDA1
|
||||
'';
|
||||
};
|
||||
|
||||
# https://huggingface.co/unsloth/SmolLM3-3B-128K-GGUF/tree/main
|
||||
"smollm3-3b-instruct" = {
|
||||
name = "SmolLM3(3B) - Instruct";
|
||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/SmolLM3-3B-128K-UD-Q4_K_XL.gguf -c 98304 --temp 0.6 --top-p 0.95 --reasoning-budget 0 -dev CUDA0";
|
||||
cmd = ''
|
||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||
--port ''${PORT} \
|
||||
-m /mnt/ssd/Models/SmolLM3-3B-128K-UD-Q4_K_XL.gguf \
|
||||
-c 98304 \
|
||||
--temp 0.6 \
|
||||
--top-p 0.95 \
|
||||
--reasoning-budget 0 \
|
||||
-dev CUDA0
|
||||
'';
|
||||
};
|
||||
|
||||
# https://huggingface.co/unsloth/ERNIE-4.5-21B-A3B-PT-GGUF/tree/main
|
||||
"ernie4.5-21b-instruct" = {
|
||||
name = "ERNIE4.5 (21B) - Instruct";
|
||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/ERNIE-4.5-21B-A3B-PT-UD-Q4_K_XL.gguf -c 98304 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20";
|
||||
cmd = ''
|
||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||
--port ''${PORT} \
|
||||
-m /mnt/ssd/Models/ERNIE-4.5-21B-A3B-PT-UD-Q4_K_XL.gguf \
|
||||
-c 98304 \
|
||||
--temp 0.7 \
|
||||
--min-p 0.0 \
|
||||
--top-p 0.8 \
|
||||
--top-k 20
|
||||
'';
|
||||
};
|
||||
|
||||
# https://huggingface.co/mradermacher/OLMoE-1B-7B-0125-Instruct-GGUF/tree/main
|
||||
"olmoe-7b-instruct" = {
|
||||
name = "OLMoE (7B) - Instruct";
|
||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/OLMoE-1B-7B-0125-Instruct.Q8_0.gguf -dev CUDA1";
|
||||
cmd = ''
|
||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||
--port ''${PORT} \
|
||||
-m /mnt/ssd/Models/OLMoE-1B-7B-0125-Instruct.Q8_0.gguf \
|
||||
-dev CUDA1
|
||||
'';
|
||||
};
|
||||
|
||||
# https://huggingface.co/gabriellarson/Phi-mini-MoE-instruct-GGUF/tree/main
|
||||
"phi-mini-8b-instruct" = {
|
||||
name = "Phi mini (8B) - Instruct";
|
||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Phi-mini-MoE-instruct-Q8_0.gguf --repeat-penalty 1.05 --temp 0.0 --top-p 1.0 --top-k 1 -dev CUDA1";
|
||||
cmd = ''
|
||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||
--port ''${PORT} \
|
||||
-m /mnt/ssd/Models/Phi-mini-MoE-instruct-Q8_0.gguf \
|
||||
--repeat-penalty 1.05 \
|
||||
--temp 0.0 \
|
||||
--top-p 1.0 \
|
||||
--top-k 1 \
|
||||
-dev CUDA1
|
||||
'';
|
||||
};
|
||||
};
|
||||
groups = {
|
||||
@@ -169,8 +307,8 @@ in
|
||||
swap = false;
|
||||
exclusive = true;
|
||||
members = [
|
||||
"gpt-oss-20b-thinking"
|
||||
"qwen2.5-coder-3b-instruct"
|
||||
"devstral-small-2-instruct" # Primary
|
||||
"qwen2.5-coder-3b-instruct" # Infill
|
||||
];
|
||||
};
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user