From f3ceb57e5e29311e2eb544c45ed264a419bbf1a7 Mon Sep 17 00:00:00 2001 From: Evan Reichard Date: Fri, 2 Jan 2026 09:50:44 -0500 Subject: [PATCH] feat: stable-diffussion & updates --- .../evanreichard@lin-va-thinkpad/default.nix | 1 + .../programs/terminal/opencode/default.nix | 15 +-- packages/llama-swap/default.nix | 4 +- packages/opencode/default.nix | 6 +- packages/stable-diffusion-cpp/default.nix | 118 ++++++++++++++++++ .../x86_64-linux/lin-va-desktop/default.nix | 76 ++++++++--- 6 files changed, 190 insertions(+), 30 deletions(-) create mode 100644 packages/stable-diffusion-cpp/default.nix diff --git a/homes/x86_64-linux/evanreichard@lin-va-thinkpad/default.nix b/homes/x86_64-linux/evanreichard@lin-va-thinkpad/default.nix index 1feaf40..e2b93a3 100755 --- a/homes/x86_64-linux/evanreichard@lin-va-thinkpad/default.nix +++ b/homes/x86_64-linux/evanreichard@lin-va-thinkpad/default.nix @@ -51,6 +51,7 @@ in git = enabled; k9s = enabled; nvim = enabled; + opencode = enabled; }; }; }; diff --git a/modules/home/programs/terminal/opencode/default.nix b/modules/home/programs/terminal/opencode/default.nix index 581d492..0b44fcf 100755 --- a/modules/home/programs/terminal/opencode/default.nix +++ b/modules/home/programs/terminal/opencode/default.nix @@ -28,15 +28,6 @@ in doom_loop = "ask"; external_directory = "ask"; }; - lsp = { - nil = { - command = [ - "${pkgs.nil}/bin/nil" - "--stdio" - ]; - extensions = [ ".nix" ]; - }; - }; provider = { "llama-swap" = { npm = "@ai-sdk/openai-compatible"; @@ -44,6 +35,9 @@ in baseURL = "https://llm-api.va.reichard.io/v1"; }; models = { + nemotron-3-nano-30b-thinking = { + name = "Nemotron 3 Nano (30B) - Thinking"; + }; gpt-oss-20b-thinking = { name = "GPT OSS (20B)"; }; @@ -62,6 +56,9 @@ in qwen3-30b-2507-instruct = { name = "Qwen3 2507 (30B) Instruct"; }; + qwen3-4b-2507-instruct = { + name = "Qwen3 2507 (4B) - Instruct"; + }; }; }; }; diff --git a/packages/llama-swap/default.nix b/packages/llama-swap/default.nix index 1aed5ae..c2e39f0 100644 --- a/packages/llama-swap/default.nix +++ b/packages/llama-swap/default.nix @@ -13,13 +13,13 @@ let in buildGoModule (finalAttrs: { pname = "llama-swap"; - version = "179"; + version = "180"; src = fetchFromGitHub { owner = "mostlygeek"; repo = "llama-swap"; tag = "v${finalAttrs.version}"; - hash = "sha256-7iftF1KMz+2DUifRG/ESHcWXYsOJ3NiEF7oHuJKxmUE="; + hash = "sha256-WPDmENGH1uwNrobcIPA2vuNEsb9sP7Wl7T0wtUv1H/s="; # populate values that require us to use git. By doing this in postFetch we # can delete .git afterwards and maintain better reproducibility of the src. leaveDotGit = true; diff --git a/packages/opencode/default.nix b/packages/opencode/default.nix index 8a823f5..7751371 100644 --- a/packages/opencode/default.nix +++ b/packages/opencode/default.nix @@ -13,12 +13,12 @@ }: let pname = "opencode"; - version = "1.0.170"; + version = "1.0.223"; src = fetchFromGitHub { owner = "sst"; repo = "opencode"; tag = "v${version}"; - hash = "sha256-Y0thIZ20p0FSBAH0mJfFn8e+OEUvlZyTuk+/yEt8Sy8="; + hash = "sha256-CzbWv48UySgXfNgtWdIdFBcqx8GHT4rSJNDdpn39b0c="; # "sha256-Y0thIZ20p0FSBAH0mJfFn8e+OEUvlZyTuk+/yEt8Sy8="; }; node_modules = stdenvNoCC.mkDerivation { @@ -75,7 +75,7 @@ let # NOTE: Required else we get errors that our fixed-output derivation references store paths dontFixup = true; - outputHash = "sha256-Aq774bgU12HkrF2oAtfu9kyQFlxUeDbmwlS9lz4Z4ZI="; + outputHash = "sha256-+HEd3I11VqejTi7cikbTL5+DmNGyvUC4Cm4ysfujwes="; outputHashAlgo = "sha256"; outputHashMode = "recursive"; }; diff --git a/packages/stable-diffusion-cpp/default.nix b/packages/stable-diffusion-cpp/default.nix new file mode 100644 index 0000000..70f8253 --- /dev/null +++ b/packages/stable-diffusion-cpp/default.nix @@ -0,0 +1,118 @@ +{ lib +, stdenv +, fetchFromGitHub +, cmake +, ninja +, pkg-config +, autoAddDriverRunpath +, config ? { } +, cudaSupport ? (config.cudaSupport or false) +, cudaPackages ? { } +, rocmSupport ? (config.rocmSupport or false) +, rocmPackages ? { } +, rocmGpuTargets ? (rocmPackages.clr.localGpuTargets or rocmPackages.clr.gpuTargets or [ ]) +, openclSupport ? false +, clblast +, vulkanSupport ? false +, shaderc +, vulkan-headers +, vulkan-loader +, spirv-tools +, metalSupport ? (stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isAarch64) +, apple-sdk +, +}: + +let + inherit (lib) + cmakeBool + cmakeFeature + optionals + ; + + effectiveStdenv = if cudaSupport then cudaPackages.backendStdenv else stdenv; +in +effectiveStdenv.mkDerivation (finalAttrs: { + pname = "stable-diffusion-cpp"; + version = "master-453-4ff2c8c"; + + src = fetchFromGitHub { + owner = "leejet"; + repo = "stable-diffusion.cpp"; + rev = "master-453-4ff2c8c"; + hash = "sha256-8cN6dYOQAKnJpuQdtayp6+o71s64lG+FcTn8GsIM4jI="; + fetchSubmodules = true; + }; + + nativeBuildInputs = [ + cmake + ninja + pkg-config + ] + ++ optionals cudaSupport [ + (cudaPackages.cuda_nvcc) + autoAddDriverRunpath + ]; + + buildInputs = + (optionals cudaSupport ( + with cudaPackages; + [ + cuda_cccl + cuda_cudart + libcublas + ] + )) + ++ (optionals rocmSupport ( + with rocmPackages; + [ + clr + hipblas + rocblas + ] + )) + ++ (optionals vulkanSupport [ + shaderc + vulkan-headers + vulkan-loader + spirv-tools + ]) + ++ (optionals openclSupport [ + clblast + ]) + ++ (optionals metalSupport [ + apple-sdk + ]); + + cmakeFlags = [ + (cmakeBool "SD_BUILD_EXAMPLES" true) + (cmakeBool "SD_BUILD_SHARED_LIBS" true) + (cmakeBool "SD_USE_SYSTEM_GGML" false) + (cmakeBool "SD_CUDA" cudaSupport) + (cmakeBool "SD_HIPBLAS" rocmSupport) + (cmakeBool "SD_VULKAN" vulkanSupport) + (cmakeBool "SD_OPENCL" openclSupport) + (cmakeBool "SD_METAL" metalSupport) + (cmakeBool "SD_FAST_SOFTMAX" false) + ] + ++ optionals cudaSupport [ + (cmakeFeature "CMAKE_CUDA_ARCHITECTURES" cudaPackages.flags.cmakeCudaArchitecturesString) + ] + ++ optionals rocmSupport [ + (cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmGpuTargets)) + ]; + + meta = with lib; { + description = "Stable Diffusion inference in pure C/C++"; + homepage = "https://github.com/leejet/stable-diffusion.cpp"; + license = licenses.mit; + mainProgram = "sd"; + maintainers = with lib.maintainers; [ + dit7ya + adriangl + ]; + platforms = platforms.unix; + badPlatforms = optionals (cudaSupport || openclSupport) platforms.darwin; + broken = metalSupport && !stdenv.hostPlatform.isDarwin; + }; +}) diff --git a/systems/x86_64-linux/lin-va-desktop/default.nix b/systems/x86_64-linux/lin-va-desktop/default.nix index f5bd6f4..0449a9e 100755 --- a/systems/x86_64-linux/lin-va-desktop/default.nix +++ b/systems/x86_64-linux/lin-va-desktop/default.nix @@ -5,6 +5,12 @@ }: let inherit (lib.${namespace}) enabled; + + llama-swap = pkgs.reichard.llama-swap; + llama-cpp = pkgs.reichard.llama-cpp; + stable-diffusion-cpp = pkgs.reichard.stable-diffusion-cpp.override { + cudaSupport = true; + }; in { system.stateVersion = "25.11"; @@ -88,14 +94,14 @@ in services.llama-swap = { enable = true; openFirewall = true; - package = pkgs.reichard.llama-swap; + package = llama-swap; settings = { models = { # https://huggingface.co/unsloth/Devstral-Small-2-24B-Instruct-2512-GGUF/tree/main "devstral-small-2-instruct" = { name = "Devstral Small 2 (24B) - Instruct"; cmd = '' - ${pkgs.reichard.llama-cpp}/bin/llama-server \ + ${llama-cpp}/bin/llama-server \ --port ''${PORT} \ -m /mnt/ssd/Models/Devstral/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL.gguf \ --chat-template-file /mnt/ssd/Models/Devstral/Devstral-Small-2-24B-Instruct-2512-UD-Q4_K_XL_template.jinja \ @@ -113,7 +119,7 @@ in "gpt-oss-20b-thinking" = { name = "GPT OSS (20B) - Thinking"; cmd = '' - ${pkgs.reichard.llama-cpp}/bin/llama-server \ + ${llama-cpp}/bin/llama-server \ --port ''${PORT} \ -m /mnt/ssd/Models/GPT-OSS/gpt-oss-20b-F16.gguf \ -c 131072 \ @@ -128,7 +134,7 @@ in "gpt-oss-csec-20b-thinking" = { name = "GPT OSS CSEC (20B) - Thinking"; cmd = '' - ${pkgs.reichard.llama-cpp}/bin/llama-server \ + ${llama-cpp}/bin/llama-server \ --port ''${PORT} \ -m /mnt/ssd/Models/GPT-OSS/GPT-OSS-Cybersecurity-20B-Merged.i1-MXFP4_MOE.gguf \ -c 131072 \ @@ -143,7 +149,7 @@ in "qwen3-next-80b-instruct" = { name = "Qwen3 Next (80B) - Instruct"; cmd = '' - ${pkgs.reichard.llama-cpp}/bin/llama-server \ + ${llama-cpp}/bin/llama-server \ --port ''${PORT} \ -m /mnt/ssd/Models/Qwen3/Qwen3-Next-80B-A3B-Instruct-UD-Q2_K_XL.gguf \ -c 262144 \ @@ -162,7 +168,7 @@ in "qwen3-30b-2507-instruct" = { name = "Qwen3 2507 (30B) - Instruct"; cmd = '' - ${pkgs.reichard.llama-cpp}/bin/llama-server \ + ${llama-cpp}/bin/llama-server \ --port ''${PORT} \ -m /mnt/ssd/Models/Qwen3/Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf \ -c 262144 \ @@ -181,10 +187,10 @@ in "qwen3-coder-30b-instruct" = { name = "Qwen3 Coder (30B) - Instruct"; cmd = '' - ${pkgs.reichard.llama-cpp}/bin/llama-server \ + ${llama-cpp}/bin/llama-server \ --port ''${PORT} \ - -m /mnt/ssd/Models/Qwen3/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf \ - -c 262144 \ + -m /mnt/ssd/Models/Qwen3/Qwen3-Coder-30B-A3B-Instruct-UD-Q6_K_XL.gguf \ + -c 131072 \ --temp 0.7 \ --min-p 0.0 \ --top-p 0.8 \ @@ -200,7 +206,7 @@ in "qwen3-30b-2507-thinking" = { name = "Qwen3 2507 (30B) - Thinking"; cmd = '' - ${pkgs.reichard.llama-cpp}/bin/llama-server \ + ${llama-cpp}/bin/llama-server \ --port ''${PORT} \ -m /mnt/ssd/Models/Qwen3/Qwen3-30B-A3B-Thinking-2507-UD-Q4_K_XL.gguf \ -c 262144 \ @@ -219,7 +225,7 @@ in "nemotron-3-nano-30b-thinking" = { name = "Nemotron 3 Nano (30B) - Thinking"; cmd = '' - ${pkgs.reichard.llama-cpp}/bin/llama-server \ + ${llama-cpp}/bin/llama-server \ --port ''${PORT} \ -m /mnt/ssd/Models/Nemotron/Nemotron-3-Nano-30B-A3B-UD-Q4_K_XL.gguf \ -c 1048576 \ @@ -233,7 +239,7 @@ in "qwen3-8b-vision" = { name = "Qwen3 Vision (8B) - Thinking"; cmd = '' - ${pkgs.reichard.llama-cpp}/bin/llama-server \ + ${llama-cpp}/bin/llama-server \ --port ''${PORT} \ -m /mnt/ssd/Models/Qwen3/Qwen3-VL-8B-Instruct-UD-Q4_K_XL.gguf \ --mmproj /mnt/ssd/Models/Qwen3/Qwen3-VL-8B-Instruct-UD-Q4_K_XL_mmproj-F16.gguf \ @@ -253,11 +259,12 @@ in "qwen2.5-coder-7b-instruct" = { name = "Qwen2.5 Coder (7B) - Instruct"; cmd = '' - ${pkgs.reichard.llama-cpp}/bin/llama-server \ + ${llama-cpp}/bin/llama-server \ -m /mnt/ssd/Models/Qwen2.5/Qwen2.5-Coder-7B-Instruct-Q8_0.gguf \ --fim-qwen-7b-default \ -c 131072 \ --port ''${PORT} \ + -fit off \ -dev CUDA1 ''; }; @@ -266,7 +273,7 @@ in "qwen2.5-coder-3b-instruct" = { name = "Qwen2.5 Coder (3B) - Instruct"; cmd = '' - ${pkgs.reichard.llama-cpp}/bin/llama-server \ + ${llama-cpp}/bin/llama-server \ -m /mnt/ssd/Models/Qwen2.5/Qwen2.5-Coder-3B-Instruct-Q8_0.gguf \ --fim-qwen-3b-default \ --port ''${PORT} \ @@ -279,7 +286,7 @@ in "qwen3-4b-2507-instruct" = { name = "Qwen3 2507 (4B) - Instruct"; cmd = '' - ${pkgs.reichard.llama-cpp}/bin/llama-server \ + ${llama-cpp}/bin/llama-server \ --port ''${PORT} \ -m /mnt/ssd/Models/Qwen3/Qwen3-4B-Instruct-2507-Q4_K_M.gguf \ -c 98304 \ @@ -289,6 +296,40 @@ in -dev CUDA1 ''; }; + + "z-image-turbo" = { + name = "Z-Image-Turbo"; + checkEndpoint = "/"; + cmd = '' + ${stable-diffusion-cpp}/bin/sd-server \ + --listen-port ''${PORT} \ + --diffusion-fa \ + --diffusion-model /mnt/ssd/StableDiffusion/ZImageTurbo/z-image-turbo-Q8_0.gguf \ + --vae /mnt/ssd/StableDiffusion/ZImageTurbo/ae.safetensors \ + --llm /mnt/ssd/Models/Qwen3/Qwen3-4B-Instruct-2507-Q4_K_M.gguf \ + --cfg-scale 1.0 \ + --steps 9 \ + --rng cuda + ''; + }; + + "qwen-image-edit" = { + name = "Qwen Image Edit"; + checkEndpoint = "/"; + cmd = '' + ${stable-diffusion-cpp}/bin/sd-server \ + --listen-port ''${PORT} \ + --diffusion-fa \ + --diffusion-model /mnt/ssd/StableDiffusion/QwenImageEdit/Qwen-Rapid-v18_Q5_K.gguf \ + --vae /mnt/ssd/StableDiffusion/QwenImageEdit/qwen_image_vae.safetensors \ + --llm /mnt/ssd/Models/Qwen2.5/Qwen2.5-VL-7B-Instruct.Q4_K_M.gguf \ + --cfg-scale 2.5 \ + --sampling-method euler \ + --flow-shift 3 \ + --steps 9 \ + --rng cuda + ''; + }; }; groups = { @@ -335,6 +376,9 @@ in git tmux vim - reichard.llama-cpp + + # Local Packages + llama-cpp + stable-diffusion-cpp ]; }