chore(llm): add vulkan support and update llama.cpp to 7278
This commit is contained in:
@@ -21,14 +21,15 @@ in
|
|||||||
blasSupport = true;
|
blasSupport = true;
|
||||||
rocmSupport = false;
|
rocmSupport = false;
|
||||||
metalSupport = false;
|
metalSupport = false;
|
||||||
|
vulkanSupport = true;
|
||||||
}).overrideAttrs
|
}).overrideAttrs
|
||||||
(oldAttrs: rec {
|
(oldAttrs: rec {
|
||||||
version = "7253";
|
version = "7278";
|
||||||
src = pkgs.fetchFromGitHub {
|
src = pkgs.fetchFromGitHub {
|
||||||
owner = "ggml-org";
|
owner = "ggml-org";
|
||||||
repo = "llama.cpp";
|
repo = "llama.cpp";
|
||||||
tag = "b${version}";
|
tag = "b${version}";
|
||||||
hash = "sha256-Gx8c00mwh/ySHDbjqCPu7nKymb24gCB/NHMGjo4FS08=";
|
hash = "sha256-Gxi/sUIuVvX5+mcZj9vCvUgODsWPAFzESQz8TjTe/Mk=";
|
||||||
leaveDotGit = true;
|
leaveDotGit = true;
|
||||||
postFetch = ''
|
postFetch = ''
|
||||||
git -C "$out" rev-parse --short HEAD > $out/COMMIT
|
git -C "$out" rev-parse --short HEAD > $out/COMMIT
|
||||||
@@ -146,7 +147,7 @@ in
|
|||||||
# https://huggingface.co/unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF/tree/main
|
# https://huggingface.co/unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF/tree/main
|
||||||
"qwen3-coder-30b-instruct" = {
|
"qwen3-coder-30b-instruct" = {
|
||||||
name = "Qwen3 Coder (30B) - Instruct";
|
name = "Qwen3 Coder (30B) - Instruct";
|
||||||
cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-Coder-30B-A3B-Instruct-UD-Q4_K_XL.gguf --ctx-size 16384 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20";
|
cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-Coder-30B-A3B-Instruct-UD-Q4_K_XL.gguf --ctx-size 55000 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 --cache-type-k q4_0 --cache-type-v q4_0";
|
||||||
};
|
};
|
||||||
|
|
||||||
# https://huggingface.co/unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF/tree/main
|
# https://huggingface.co/unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF/tree/main
|
||||||
@@ -166,6 +167,18 @@ in
|
|||||||
name = "Qwen3 Vision (8B) - Thinking";
|
name = "Qwen3 Vision (8B) - Thinking";
|
||||||
cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-VL-8B-Instruct-UD-Q4_K_XL.gguf --mmproj /mnt/ssd/Models/Qwen3-VL-8B-Instruct-UD-Q4_K_XL_mmproj-F16.gguf --ctx-size 131072 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 --cache-type-k q4_0 --cache-type-v q4_0";
|
cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-VL-8B-Instruct-UD-Q4_K_XL.gguf --mmproj /mnt/ssd/Models/Qwen3-VL-8B-Instruct-UD-Q4_K_XL_mmproj-F16.gguf --ctx-size 131072 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 --cache-type-k q4_0 --cache-type-v q4_0";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
# https://huggingface.co/mradermacher/OLMoE-1B-7B-0125-Instruct-GGUF/tree/main
|
||||||
|
"olmoe-7b-instruct" = {
|
||||||
|
name = "OLMoE (7B) - Instruct";
|
||||||
|
cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/OLMoE-1B-7B-0125-Instruct.Q8_0.gguf -dev CUDA0";
|
||||||
|
};
|
||||||
|
|
||||||
|
# https://huggingface.co/gabriellarson/Phi-mini-MoE-instruct-GGUF/tree/main
|
||||||
|
"phi-mini-8b-instruct" = {
|
||||||
|
name = "Phi mini (8B) - Instruct";
|
||||||
|
cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Phi-mini-MoE-instruct-Q8_0.gguf --repeat-penalty 1.05 --temp 0.0 --top-p 1.0 --top-k 1 -dev CUDA0";
|
||||||
|
};
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user