feat(llm): add Qwen3-VL-8B and Qwen2.5-Coder support, update CUDA config

- Add new `qwen3-8b-vision` model with multimodal support using mmproj file
- Add new `qwen2.5-coder-7b-instruct` model with FIM enabled via `--fim-qwen-7b-default`
- Update CUDA device usage from `CUDA0` to `CUDA1` for `olmoe-7b-instruct` and `phi-mini-8b-instruct`
- Upgrade llama.cpp to version 7426 with updated hash and CUDA architectures (61;86)
- Add Copilot acceptance shortcut `<C-J>` in insert mode and disable tab mapping
- Improve cache type settings across multiple models for better performance
This commit is contained in:
2025-12-17 09:27:15 -05:00
parent e947e13a02
commit b93249daf7
3 changed files with 31 additions and 31 deletions

View File

@@ -25,6 +25,13 @@ local function toggle_llm_fim_provider()
end
vim.keymap.set("n", "<leader>cf", toggle_llm_fim_provider, { desc = "Toggle FIM (Llama / Copilot)" })
-- Copilot Accept Shortcut
vim.keymap.set('i', '<C-J>', 'copilot#Accept("\\<CR>")', {
expr = true,
replace_keycodes = false
})
vim.g.copilot_no_tab_map = true
-- Configure LLama LLM FIM
vim.g.llama_config = {
endpoint = llm_endpoint .. "/infill",