feat: stable-diffussion & updates

This commit is contained in:
2026-01-02 09:50:44 -05:00
parent bb3305adbf
commit f3ceb57e5e
6 changed files with 190 additions and 30 deletions

View File

@@ -51,6 +51,7 @@ in
git = enabled;
k9s = enabled;
nvim = enabled;
opencode = enabled;
};
};
};

View File

@@ -28,15 +28,6 @@ in
doom_loop = "ask";
external_directory = "ask";
};
lsp = {
nil = {
command = [
"${pkgs.nil}/bin/nil"
"--stdio"
];
extensions = [ ".nix" ];
};
};
provider = {
"llama-swap" = {
npm = "@ai-sdk/openai-compatible";
@@ -44,6 +35,9 @@ in
baseURL = "https://llm-api.va.reichard.io/v1";
};
models = {
nemotron-3-nano-30b-thinking = {
name = "Nemotron 3 Nano (30B) - Thinking";
};
gpt-oss-20b-thinking = {
name = "GPT OSS (20B)";
};
@@ -62,6 +56,9 @@ in
qwen3-30b-2507-instruct = {
name = "Qwen3 2507 (30B) Instruct";
};
qwen3-4b-2507-instruct = {
name = "Qwen3 2507 (4B) - Instruct";
};
};
};
};

View File

@@ -13,13 +13,13 @@ let
in
buildGoModule (finalAttrs: {
pname = "llama-swap";
version = "179";
version = "180";
src = fetchFromGitHub {
owner = "mostlygeek";
repo = "llama-swap";
tag = "v${finalAttrs.version}";
hash = "sha256-7iftF1KMz+2DUifRG/ESHcWXYsOJ3NiEF7oHuJKxmUE=";
hash = "sha256-WPDmENGH1uwNrobcIPA2vuNEsb9sP7Wl7T0wtUv1H/s=";
# populate values that require us to use git. By doing this in postFetch we
# can delete .git afterwards and maintain better reproducibility of the src.
leaveDotGit = true;

View File

@@ -13,12 +13,12 @@
}:
let
pname = "opencode";
version = "1.0.170";
version = "1.0.223";
src = fetchFromGitHub {
owner = "sst";
repo = "opencode";
tag = "v${version}";
hash = "sha256-Y0thIZ20p0FSBAH0mJfFn8e+OEUvlZyTuk+/yEt8Sy8=";
hash = "sha256-CzbWv48UySgXfNgtWdIdFBcqx8GHT4rSJNDdpn39b0c="; # "sha256-Y0thIZ20p0FSBAH0mJfFn8e+OEUvlZyTuk+/yEt8Sy8=";
};
node_modules = stdenvNoCC.mkDerivation {
@@ -75,7 +75,7 @@ let
# NOTE: Required else we get errors that our fixed-output derivation references store paths
dontFixup = true;
outputHash = "sha256-Aq774bgU12HkrF2oAtfu9kyQFlxUeDbmwlS9lz4Z4ZI=";
outputHash = "sha256-+HEd3I11VqejTi7cikbTL5+DmNGyvUC4Cm4ysfujwes=";
outputHashAlgo = "sha256";
outputHashMode = "recursive";
};

View File

@@ -0,0 +1,118 @@
{ lib
, stdenv
, fetchFromGitHub
, cmake
, ninja
, pkg-config
, autoAddDriverRunpath
, config ? { }
, cudaSupport ? (config.cudaSupport or false)
, cudaPackages ? { }
, rocmSupport ? (config.rocmSupport or false)
, rocmPackages ? { }
, rocmGpuTargets ? (rocmPackages.clr.localGpuTargets or rocmPackages.clr.gpuTargets or [ ])
, openclSupport ? false
, clblast
, vulkanSupport ? false
, shaderc
, vulkan-headers
, vulkan-loader
, spirv-tools
, metalSupport ? (stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isAarch64)
, apple-sdk
,
}:
let
inherit (lib)
cmakeBool
cmakeFeature
optionals
;
effectiveStdenv = if cudaSupport then cudaPackages.backendStdenv else stdenv;
in
effectiveStdenv.mkDerivation (finalAttrs: {
pname = "stable-diffusion-cpp";
version = "master-453-4ff2c8c";
src = fetchFromGitHub {
owner = "leejet";
repo = "stable-diffusion.cpp";
rev = "master-453-4ff2c8c";
hash = "sha256-8cN6dYOQAKnJpuQdtayp6+o71s64lG+FcTn8GsIM4jI=";
fetchSubmodules = true;
};
nativeBuildInputs = [
cmake
ninja
pkg-config
]
++ optionals cudaSupport [
(cudaPackages.cuda_nvcc)
autoAddDriverRunpath
];
buildInputs =
(optionals cudaSupport (
with cudaPackages;
[
cuda_cccl
cuda_cudart
libcublas
]
))
++ (optionals rocmSupport (
with rocmPackages;
[
clr
hipblas
rocblas
]
))
++ (optionals vulkanSupport [
shaderc
vulkan-headers
vulkan-loader
spirv-tools
])
++ (optionals openclSupport [
clblast
])
++ (optionals metalSupport [
apple-sdk
]);
cmakeFlags = [
(cmakeBool "SD_BUILD_EXAMPLES" true)
(cmakeBool "SD_BUILD_SHARED_LIBS" true)
(cmakeBool "SD_USE_SYSTEM_GGML" false)
(cmakeBool "SD_CUDA" cudaSupport)
(cmakeBool "SD_HIPBLAS" rocmSupport)
(cmakeBool "SD_VULKAN" vulkanSupport)
(cmakeBool "SD_OPENCL" openclSupport)
(cmakeBool "SD_METAL" metalSupport)
(cmakeBool "SD_FAST_SOFTMAX" false)
]
++ optionals cudaSupport [
(cmakeFeature "CMAKE_CUDA_ARCHITECTURES" cudaPackages.flags.cmakeCudaArchitecturesString)
]
++ optionals rocmSupport [
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmGpuTargets))
];
meta = with lib; {
description = "Stable Diffusion inference in pure C/C++";
homepage = "https://github.com/leejet/stable-diffusion.cpp";
license = licenses.mit;
mainProgram = "sd";
maintainers = with lib.maintainers; [
dit7ya
adriangl
];
platforms = platforms.unix;
badPlatforms = optionals (cudaSupport || openclSupport) platforms.darwin;
broken = metalSupport && !stdenv.hostPlatform.isDarwin;
};
})

View File

@@ -5,6 +5,12 @@
}:
let
inherit (lib.${namespace}) enabled;
llama-swap = pkgs.reichard.llama-swap;
llama-cpp = pkgs.reichard.llama-cpp;
stable-diffusion-cpp = pkgs.reichard.stable-diffusion-cpp.override {
cudaSupport = true;
};
in
{
system.stateVersion = "25.11";
@@ -88,14 +94,14 @@ in
services.llama-swap = {
enable = true;
openFirewall = true;
package = pkgs.reichard.llama-swap;
package = llama-swap;
settings = {
models = {
# https://huggingface.co/unsloth/Devstral-Small-2-24B-Instruct-2512-GGUF/tree/main
"devstral-small-2-instruct" = {
name = "Devstral Small 2 (24B) - Instruct";
cmd = ''
${pkgs.reichard.llama-cpp}/bin/llama-server \
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Devstral/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL.gguf \
--chat-template-file /mnt/ssd/Models/Devstral/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL_template.jinja \
@@ -113,7 +119,7 @@ in
"gpt-oss-20b-thinking" = {
name = "GPT OSS (20B) - Thinking";
cmd = ''
${pkgs.reichard.llama-cpp}/bin/llama-server \
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/GPT-OSS/gpt-oss-20b-F16.gguf \
-c 131072 \
@@ -128,7 +134,7 @@ in
"gpt-oss-csec-20b-thinking" = {
name = "GPT OSS CSEC (20B) - Thinking";
cmd = ''
${pkgs.reichard.llama-cpp}/bin/llama-server \
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/GPT-OSS/GPT-OSS-Cybersecurity-20B-Merged.i1-MXFP4_MOE.gguf \
-c 131072 \
@@ -143,7 +149,7 @@ in
"qwen3-next-80b-instruct" = {
name = "Qwen3 Next (80B) - Instruct";
cmd = ''
${pkgs.reichard.llama-cpp}/bin/llama-server \
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3/Qwen3-Next-80B-A3B-Instruct-UD-Q2_K_XL.gguf \
-c 262144 \
@@ -162,7 +168,7 @@ in
"qwen3-30b-2507-instruct" = {
name = "Qwen3 2507 (30B) - Instruct";
cmd = ''
${pkgs.reichard.llama-cpp}/bin/llama-server \
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3/Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf \
-c 262144 \
@@ -181,10 +187,10 @@ in
"qwen3-coder-30b-instruct" = {
name = "Qwen3 Coder (30B) - Instruct";
cmd = ''
${pkgs.reichard.llama-cpp}/bin/llama-server \
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf \
-c 262144 \
-m /mnt/ssd/Models/Qwen3/Qwen3-Coder-30B-A3B-Instruct-UD-Q6_K_XL.gguf \
-c 131072 \
--temp 0.7 \
--min-p 0.0 \
--top-p 0.8 \
@@ -200,7 +206,7 @@ in
"qwen3-30b-2507-thinking" = {
name = "Qwen3 2507 (30B) - Thinking";
cmd = ''
${pkgs.reichard.llama-cpp}/bin/llama-server \
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3/Qwen3-30B-A3B-Thinking-2507-UD-Q4_K_XL.gguf \
-c 262144 \
@@ -219,7 +225,7 @@ in
"nemotron-3-nano-30b-thinking" = {
name = "Nemotron 3 Nano (30B) - Thinking";
cmd = ''
${pkgs.reichard.llama-cpp}/bin/llama-server \
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Nemotron/Nemotron-3-Nano-30B-A3B-UD-Q4_K_XL.gguf \
-c 1048576 \
@@ -233,7 +239,7 @@ in
"qwen3-8b-vision" = {
name = "Qwen3 Vision (8B) - Thinking";
cmd = ''
${pkgs.reichard.llama-cpp}/bin/llama-server \
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3/Qwen3-VL-8B-Instruct-UD-Q4_K_XL.gguf \
--mmproj /mnt/ssd/Models/Qwen3/Qwen3-VL-8B-Instruct-UD-Q4_K_XL_mmproj-F16.gguf \
@@ -253,11 +259,12 @@ in
"qwen2.5-coder-7b-instruct" = {
name = "Qwen2.5 Coder (7B) - Instruct";
cmd = ''
${pkgs.reichard.llama-cpp}/bin/llama-server \
${llama-cpp}/bin/llama-server \
-m /mnt/ssd/Models/Qwen2.5/Qwen2.5-Coder-7B-Instruct-Q8_0.gguf \
--fim-qwen-7b-default \
-c 131072 \
--port ''${PORT} \
-fit off \
-dev CUDA1
'';
};
@@ -266,7 +273,7 @@ in
"qwen2.5-coder-3b-instruct" = {
name = "Qwen2.5 Coder (3B) - Instruct";
cmd = ''
${pkgs.reichard.llama-cpp}/bin/llama-server \
${llama-cpp}/bin/llama-server \
-m /mnt/ssd/Models/Qwen2.5/Qwen2.5-Coder-3B-Instruct-Q8_0.gguf \
--fim-qwen-3b-default \
--port ''${PORT} \
@@ -279,7 +286,7 @@ in
"qwen3-4b-2507-instruct" = {
name = "Qwen3 2507 (4B) - Instruct";
cmd = ''
${pkgs.reichard.llama-cpp}/bin/llama-server \
${llama-cpp}/bin/llama-server \
--port ''${PORT} \
-m /mnt/ssd/Models/Qwen3/Qwen3-4B-Instruct-2507-Q4_K_M.gguf \
-c 98304 \
@@ -289,6 +296,40 @@ in
-dev CUDA1
'';
};
"z-image-turbo" = {
name = "Z-Image-Turbo";
checkEndpoint = "/";
cmd = ''
${stable-diffusion-cpp}/bin/sd-server \
--listen-port ''${PORT} \
--diffusion-fa \
--diffusion-model /mnt/ssd/StableDiffusion/ZImageTurbo/z-image-turbo-Q8_0.gguf \
--vae /mnt/ssd/StableDiffusion/ZImageTurbo/ae.safetensors \
--llm /mnt/ssd/Models/Qwen3/Qwen3-4B-Instruct-2507-Q4_K_M.gguf \
--cfg-scale 1.0 \
--steps 9 \
--rng cuda
'';
};
"qwen-image-edit" = {
name = "Qwen Image Edit";
checkEndpoint = "/";
cmd = ''
${stable-diffusion-cpp}/bin/sd-server \
--listen-port ''${PORT} \
--diffusion-fa \
--diffusion-model /mnt/ssd/StableDiffusion/QwenImageEdit/Qwen-Rapid-v18_Q5_K.gguf \
--vae /mnt/ssd/StableDiffusion/QwenImageEdit/qwen_image_vae.safetensors \
--llm /mnt/ssd/Models/Qwen2.5/Qwen2.5-VL-7B-Instruct.Q4_K_M.gguf \
--cfg-scale 2.5 \
--sampling-method euler \
--flow-shift 3 \
--steps 9 \
--rng cuda
'';
};
};
groups = {
@@ -335,6 +376,9 @@ in
git
tmux
vim
reichard.llama-cpp
# Local Packages
llama-cpp
stable-diffusion-cpp
];
}