feat: stable-diffussion & updates
This commit is contained in:
@@ -51,6 +51,7 @@ in
|
||||
git = enabled;
|
||||
k9s = enabled;
|
||||
nvim = enabled;
|
||||
opencode = enabled;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
@@ -28,15 +28,6 @@ in
|
||||
doom_loop = "ask";
|
||||
external_directory = "ask";
|
||||
};
|
||||
lsp = {
|
||||
nil = {
|
||||
command = [
|
||||
"${pkgs.nil}/bin/nil"
|
||||
"--stdio"
|
||||
];
|
||||
extensions = [ ".nix" ];
|
||||
};
|
||||
};
|
||||
provider = {
|
||||
"llama-swap" = {
|
||||
npm = "@ai-sdk/openai-compatible";
|
||||
@@ -44,6 +35,9 @@ in
|
||||
baseURL = "https://llm-api.va.reichard.io/v1";
|
||||
};
|
||||
models = {
|
||||
nemotron-3-nano-30b-thinking = {
|
||||
name = "Nemotron 3 Nano (30B) - Thinking";
|
||||
};
|
||||
gpt-oss-20b-thinking = {
|
||||
name = "GPT OSS (20B)";
|
||||
};
|
||||
@@ -62,6 +56,9 @@ in
|
||||
qwen3-30b-2507-instruct = {
|
||||
name = "Qwen3 2507 (30B) Instruct";
|
||||
};
|
||||
qwen3-4b-2507-instruct = {
|
||||
name = "Qwen3 2507 (4B) - Instruct";
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
@@ -13,13 +13,13 @@ let
|
||||
in
|
||||
buildGoModule (finalAttrs: {
|
||||
pname = "llama-swap";
|
||||
version = "179";
|
||||
version = "180";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "mostlygeek";
|
||||
repo = "llama-swap";
|
||||
tag = "v${finalAttrs.version}";
|
||||
hash = "sha256-7iftF1KMz+2DUifRG/ESHcWXYsOJ3NiEF7oHuJKxmUE=";
|
||||
hash = "sha256-WPDmENGH1uwNrobcIPA2vuNEsb9sP7Wl7T0wtUv1H/s=";
|
||||
# populate values that require us to use git. By doing this in postFetch we
|
||||
# can delete .git afterwards and maintain better reproducibility of the src.
|
||||
leaveDotGit = true;
|
||||
|
||||
@@ -13,12 +13,12 @@
|
||||
}:
|
||||
let
|
||||
pname = "opencode";
|
||||
version = "1.0.170";
|
||||
version = "1.0.223";
|
||||
src = fetchFromGitHub {
|
||||
owner = "sst";
|
||||
repo = "opencode";
|
||||
tag = "v${version}";
|
||||
hash = "sha256-Y0thIZ20p0FSBAH0mJfFn8e+OEUvlZyTuk+/yEt8Sy8=";
|
||||
hash = "sha256-CzbWv48UySgXfNgtWdIdFBcqx8GHT4rSJNDdpn39b0c="; # "sha256-Y0thIZ20p0FSBAH0mJfFn8e+OEUvlZyTuk+/yEt8Sy8=";
|
||||
};
|
||||
|
||||
node_modules = stdenvNoCC.mkDerivation {
|
||||
@@ -75,7 +75,7 @@ let
|
||||
# NOTE: Required else we get errors that our fixed-output derivation references store paths
|
||||
dontFixup = true;
|
||||
|
||||
outputHash = "sha256-Aq774bgU12HkrF2oAtfu9kyQFlxUeDbmwlS9lz4Z4ZI=";
|
||||
outputHash = "sha256-+HEd3I11VqejTi7cikbTL5+DmNGyvUC4Cm4ysfujwes=";
|
||||
outputHashAlgo = "sha256";
|
||||
outputHashMode = "recursive";
|
||||
};
|
||||
|
||||
118
packages/stable-diffusion-cpp/default.nix
Normal file
118
packages/stable-diffusion-cpp/default.nix
Normal file
@@ -0,0 +1,118 @@
|
||||
{ lib
|
||||
, stdenv
|
||||
, fetchFromGitHub
|
||||
, cmake
|
||||
, ninja
|
||||
, pkg-config
|
||||
, autoAddDriverRunpath
|
||||
, config ? { }
|
||||
, cudaSupport ? (config.cudaSupport or false)
|
||||
, cudaPackages ? { }
|
||||
, rocmSupport ? (config.rocmSupport or false)
|
||||
, rocmPackages ? { }
|
||||
, rocmGpuTargets ? (rocmPackages.clr.localGpuTargets or rocmPackages.clr.gpuTargets or [ ])
|
||||
, openclSupport ? false
|
||||
, clblast
|
||||
, vulkanSupport ? false
|
||||
, shaderc
|
||||
, vulkan-headers
|
||||
, vulkan-loader
|
||||
, spirv-tools
|
||||
, metalSupport ? (stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isAarch64)
|
||||
, apple-sdk
|
||||
,
|
||||
}:
|
||||
|
||||
let
|
||||
inherit (lib)
|
||||
cmakeBool
|
||||
cmakeFeature
|
||||
optionals
|
||||
;
|
||||
|
||||
effectiveStdenv = if cudaSupport then cudaPackages.backendStdenv else stdenv;
|
||||
in
|
||||
effectiveStdenv.mkDerivation (finalAttrs: {
|
||||
pname = "stable-diffusion-cpp";
|
||||
version = "master-453-4ff2c8c";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "leejet";
|
||||
repo = "stable-diffusion.cpp";
|
||||
rev = "master-453-4ff2c8c";
|
||||
hash = "sha256-8cN6dYOQAKnJpuQdtayp6+o71s64lG+FcTn8GsIM4jI=";
|
||||
fetchSubmodules = true;
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
ninja
|
||||
pkg-config
|
||||
]
|
||||
++ optionals cudaSupport [
|
||||
(cudaPackages.cuda_nvcc)
|
||||
autoAddDriverRunpath
|
||||
];
|
||||
|
||||
buildInputs =
|
||||
(optionals cudaSupport (
|
||||
with cudaPackages;
|
||||
[
|
||||
cuda_cccl
|
||||
cuda_cudart
|
||||
libcublas
|
||||
]
|
||||
))
|
||||
++ (optionals rocmSupport (
|
||||
with rocmPackages;
|
||||
[
|
||||
clr
|
||||
hipblas
|
||||
rocblas
|
||||
]
|
||||
))
|
||||
++ (optionals vulkanSupport [
|
||||
shaderc
|
||||
vulkan-headers
|
||||
vulkan-loader
|
||||
spirv-tools
|
||||
])
|
||||
++ (optionals openclSupport [
|
||||
clblast
|
||||
])
|
||||
++ (optionals metalSupport [
|
||||
apple-sdk
|
||||
]);
|
||||
|
||||
cmakeFlags = [
|
||||
(cmakeBool "SD_BUILD_EXAMPLES" true)
|
||||
(cmakeBool "SD_BUILD_SHARED_LIBS" true)
|
||||
(cmakeBool "SD_USE_SYSTEM_GGML" false)
|
||||
(cmakeBool "SD_CUDA" cudaSupport)
|
||||
(cmakeBool "SD_HIPBLAS" rocmSupport)
|
||||
(cmakeBool "SD_VULKAN" vulkanSupport)
|
||||
(cmakeBool "SD_OPENCL" openclSupport)
|
||||
(cmakeBool "SD_METAL" metalSupport)
|
||||
(cmakeBool "SD_FAST_SOFTMAX" false)
|
||||
]
|
||||
++ optionals cudaSupport [
|
||||
(cmakeFeature "CMAKE_CUDA_ARCHITECTURES" cudaPackages.flags.cmakeCudaArchitecturesString)
|
||||
]
|
||||
++ optionals rocmSupport [
|
||||
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmGpuTargets))
|
||||
];
|
||||
|
||||
meta = with lib; {
|
||||
description = "Stable Diffusion inference in pure C/C++";
|
||||
homepage = "https://github.com/leejet/stable-diffusion.cpp";
|
||||
license = licenses.mit;
|
||||
mainProgram = "sd";
|
||||
maintainers = with lib.maintainers; [
|
||||
dit7ya
|
||||
adriangl
|
||||
];
|
||||
platforms = platforms.unix;
|
||||
badPlatforms = optionals (cudaSupport || openclSupport) platforms.darwin;
|
||||
broken = metalSupport && !stdenv.hostPlatform.isDarwin;
|
||||
};
|
||||
})
|
||||
@@ -5,6 +5,12 @@
|
||||
}:
|
||||
let
|
||||
inherit (lib.${namespace}) enabled;
|
||||
|
||||
llama-swap = pkgs.reichard.llama-swap;
|
||||
llama-cpp = pkgs.reichard.llama-cpp;
|
||||
stable-diffusion-cpp = pkgs.reichard.stable-diffusion-cpp.override {
|
||||
cudaSupport = true;
|
||||
};
|
||||
in
|
||||
{
|
||||
system.stateVersion = "25.11";
|
||||
@@ -88,14 +94,14 @@ in
|
||||
services.llama-swap = {
|
||||
enable = true;
|
||||
openFirewall = true;
|
||||
package = pkgs.reichard.llama-swap;
|
||||
package = llama-swap;
|
||||
settings = {
|
||||
models = {
|
||||
# https://huggingface.co/unsloth/Devstral-Small-2-24B-Instruct-2512-GGUF/tree/main
|
||||
"devstral-small-2-instruct" = {
|
||||
name = "Devstral Small 2 (24B) - Instruct";
|
||||
cmd = ''
|
||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||
${llama-cpp}/bin/llama-server \
|
||||
--port ''${PORT} \
|
||||
-m /mnt/ssd/Models/Devstral/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL.gguf \
|
||||
--chat-template-file /mnt/ssd/Models/Devstral/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL_template.jinja \
|
||||
@@ -113,7 +119,7 @@ in
|
||||
"gpt-oss-20b-thinking" = {
|
||||
name = "GPT OSS (20B) - Thinking";
|
||||
cmd = ''
|
||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||
${llama-cpp}/bin/llama-server \
|
||||
--port ''${PORT} \
|
||||
-m /mnt/ssd/Models/GPT-OSS/gpt-oss-20b-F16.gguf \
|
||||
-c 131072 \
|
||||
@@ -128,7 +134,7 @@ in
|
||||
"gpt-oss-csec-20b-thinking" = {
|
||||
name = "GPT OSS CSEC (20B) - Thinking";
|
||||
cmd = ''
|
||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||
${llama-cpp}/bin/llama-server \
|
||||
--port ''${PORT} \
|
||||
-m /mnt/ssd/Models/GPT-OSS/GPT-OSS-Cybersecurity-20B-Merged.i1-MXFP4_MOE.gguf \
|
||||
-c 131072 \
|
||||
@@ -143,7 +149,7 @@ in
|
||||
"qwen3-next-80b-instruct" = {
|
||||
name = "Qwen3 Next (80B) - Instruct";
|
||||
cmd = ''
|
||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||
${llama-cpp}/bin/llama-server \
|
||||
--port ''${PORT} \
|
||||
-m /mnt/ssd/Models/Qwen3/Qwen3-Next-80B-A3B-Instruct-UD-Q2_K_XL.gguf \
|
||||
-c 262144 \
|
||||
@@ -162,7 +168,7 @@ in
|
||||
"qwen3-30b-2507-instruct" = {
|
||||
name = "Qwen3 2507 (30B) - Instruct";
|
||||
cmd = ''
|
||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||
${llama-cpp}/bin/llama-server \
|
||||
--port ''${PORT} \
|
||||
-m /mnt/ssd/Models/Qwen3/Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf \
|
||||
-c 262144 \
|
||||
@@ -181,10 +187,10 @@ in
|
||||
"qwen3-coder-30b-instruct" = {
|
||||
name = "Qwen3 Coder (30B) - Instruct";
|
||||
cmd = ''
|
||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||
${llama-cpp}/bin/llama-server \
|
||||
--port ''${PORT} \
|
||||
-m /mnt/ssd/Models/Qwen3/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf \
|
||||
-c 262144 \
|
||||
-m /mnt/ssd/Models/Qwen3/Qwen3-Coder-30B-A3B-Instruct-UD-Q6_K_XL.gguf \
|
||||
-c 131072 \
|
||||
--temp 0.7 \
|
||||
--min-p 0.0 \
|
||||
--top-p 0.8 \
|
||||
@@ -200,7 +206,7 @@ in
|
||||
"qwen3-30b-2507-thinking" = {
|
||||
name = "Qwen3 2507 (30B) - Thinking";
|
||||
cmd = ''
|
||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||
${llama-cpp}/bin/llama-server \
|
||||
--port ''${PORT} \
|
||||
-m /mnt/ssd/Models/Qwen3/Qwen3-30B-A3B-Thinking-2507-UD-Q4_K_XL.gguf \
|
||||
-c 262144 \
|
||||
@@ -219,7 +225,7 @@ in
|
||||
"nemotron-3-nano-30b-thinking" = {
|
||||
name = "Nemotron 3 Nano (30B) - Thinking";
|
||||
cmd = ''
|
||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||
${llama-cpp}/bin/llama-server \
|
||||
--port ''${PORT} \
|
||||
-m /mnt/ssd/Models/Nemotron/Nemotron-3-Nano-30B-A3B-UD-Q4_K_XL.gguf \
|
||||
-c 1048576 \
|
||||
@@ -233,7 +239,7 @@ in
|
||||
"qwen3-8b-vision" = {
|
||||
name = "Qwen3 Vision (8B) - Thinking";
|
||||
cmd = ''
|
||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||
${llama-cpp}/bin/llama-server \
|
||||
--port ''${PORT} \
|
||||
-m /mnt/ssd/Models/Qwen3/Qwen3-VL-8B-Instruct-UD-Q4_K_XL.gguf \
|
||||
--mmproj /mnt/ssd/Models/Qwen3/Qwen3-VL-8B-Instruct-UD-Q4_K_XL_mmproj-F16.gguf \
|
||||
@@ -253,11 +259,12 @@ in
|
||||
"qwen2.5-coder-7b-instruct" = {
|
||||
name = "Qwen2.5 Coder (7B) - Instruct";
|
||||
cmd = ''
|
||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||
${llama-cpp}/bin/llama-server \
|
||||
-m /mnt/ssd/Models/Qwen2.5/Qwen2.5-Coder-7B-Instruct-Q8_0.gguf \
|
||||
--fim-qwen-7b-default \
|
||||
-c 131072 \
|
||||
--port ''${PORT} \
|
||||
-fit off \
|
||||
-dev CUDA1
|
||||
'';
|
||||
};
|
||||
@@ -266,7 +273,7 @@ in
|
||||
"qwen2.5-coder-3b-instruct" = {
|
||||
name = "Qwen2.5 Coder (3B) - Instruct";
|
||||
cmd = ''
|
||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||
${llama-cpp}/bin/llama-server \
|
||||
-m /mnt/ssd/Models/Qwen2.5/Qwen2.5-Coder-3B-Instruct-Q8_0.gguf \
|
||||
--fim-qwen-3b-default \
|
||||
--port ''${PORT} \
|
||||
@@ -279,7 +286,7 @@ in
|
||||
"qwen3-4b-2507-instruct" = {
|
||||
name = "Qwen3 2507 (4B) - Instruct";
|
||||
cmd = ''
|
||||
${pkgs.reichard.llama-cpp}/bin/llama-server \
|
||||
${llama-cpp}/bin/llama-server \
|
||||
--port ''${PORT} \
|
||||
-m /mnt/ssd/Models/Qwen3/Qwen3-4B-Instruct-2507-Q4_K_M.gguf \
|
||||
-c 98304 \
|
||||
@@ -289,6 +296,40 @@ in
|
||||
-dev CUDA1
|
||||
'';
|
||||
};
|
||||
|
||||
"z-image-turbo" = {
|
||||
name = "Z-Image-Turbo";
|
||||
checkEndpoint = "/";
|
||||
cmd = ''
|
||||
${stable-diffusion-cpp}/bin/sd-server \
|
||||
--listen-port ''${PORT} \
|
||||
--diffusion-fa \
|
||||
--diffusion-model /mnt/ssd/StableDiffusion/ZImageTurbo/z-image-turbo-Q8_0.gguf \
|
||||
--vae /mnt/ssd/StableDiffusion/ZImageTurbo/ae.safetensors \
|
||||
--llm /mnt/ssd/Models/Qwen3/Qwen3-4B-Instruct-2507-Q4_K_M.gguf \
|
||||
--cfg-scale 1.0 \
|
||||
--steps 9 \
|
||||
--rng cuda
|
||||
'';
|
||||
};
|
||||
|
||||
"qwen-image-edit" = {
|
||||
name = "Qwen Image Edit";
|
||||
checkEndpoint = "/";
|
||||
cmd = ''
|
||||
${stable-diffusion-cpp}/bin/sd-server \
|
||||
--listen-port ''${PORT} \
|
||||
--diffusion-fa \
|
||||
--diffusion-model /mnt/ssd/StableDiffusion/QwenImageEdit/Qwen-Rapid-v18_Q5_K.gguf \
|
||||
--vae /mnt/ssd/StableDiffusion/QwenImageEdit/qwen_image_vae.safetensors \
|
||||
--llm /mnt/ssd/Models/Qwen2.5/Qwen2.5-VL-7B-Instruct.Q4_K_M.gguf \
|
||||
--cfg-scale 2.5 \
|
||||
--sampling-method euler \
|
||||
--flow-shift 3 \
|
||||
--steps 9 \
|
||||
--rng cuda
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
groups = {
|
||||
@@ -335,6 +376,9 @@ in
|
||||
git
|
||||
tmux
|
||||
vim
|
||||
reichard.llama-cpp
|
||||
|
||||
# Local Packages
|
||||
llama-cpp
|
||||
stable-diffusion-cpp
|
||||
];
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user