From 990b6a439285c6ad9236578519640424fdac3f09 Mon Sep 17 00:00:00 2001 From: Evan Reichard Date: Thu, 30 Apr 2026 20:04:58 -0400 Subject: [PATCH] feat: vllm --- modules/nixos/services/llama-swap/config.nix | 41 +++++++++++++++++++ modules/nixos/services/llama-swap/default.nix | 1 + .../lin-va-mbp-personal/default.nix | 1 + 3 files changed, 43 insertions(+) diff --git a/modules/nixos/services/llama-swap/config.nix b/modules/nixos/services/llama-swap/config.nix index 2552416..b41cdd1 100644 --- a/modules/nixos/services/llama-swap/config.nix +++ b/modules/nixos/services/llama-swap/config.nix @@ -357,6 +357,47 @@ in }; }; + "vllm-qwen3.5-27b-thinking" = { + name = "vLLM Qwen3.5 (27B) - Thinking"; + macros.ctx = "196608"; + proxy = "http://127.0.0.1:\${PORT}"; + cmd = '' + ${pkgs.docker}/bin/docker run --rm --device=nvidia.com/gpu=all \ + --name ''${MODEL_ID} \ + -e PYTORCH_ALLOC_CONF=expandable_segments:True \ + -v /mnt/ssd/vLLM:/root/.cache/huggingface \ + -p ''${PORT}:8000 \ + --ipc=host vllm/vllm-openai:latest \ + --served-model-name ''${MODEL_ID} \ + --model cyankiwi/Qwen3.5-27B-AWQ-4bit \ + --max-model-len 24576 \ + --kv-cache-dtype auto \ + --max-num-seqs 4 \ + --max-num-batched-tokens 4096 \ + --enable-chunked-prefill \ + --gpu-memory-utilization 0.95 \ + --language-model-only \ + --speculative-config '{"method":"mtp","num_speculative_tokens":3}' \ + --enable-prefix-caching \ + --enforce-eager \ + --block-size 32 \ + --swap-space 4 \ + --tensor-parallel-size 1 \ + --reasoning-parser qwen3 \ + --enable-auto-tool-choice \ + --default-chat-template-kwargs '{"enable_thinking": true}' \ + --tool-call-parser qwen3_coder + ''; + cmdStop = "docker stop \${MODEL_ID}"; + + metadata = { + type = [ + "text-generation" + "coding" + ]; + }; + }; + # --------------------------------------- # ---------- Stable Diffussion ---------- # --------------------------------------- diff --git a/modules/nixos/services/llama-swap/default.nix b/modules/nixos/services/llama-swap/default.nix index 13fbffc..b9e18e0 100644 --- a/modules/nixos/services/llama-swap/default.nix +++ b/modules/nixos/services/llama-swap/default.nix @@ -27,6 +27,7 @@ in users.users.llama-swap = { isSystemUser = true; group = "llama-swap"; + extraGroups = [ "podman" ]; }; # Create Service diff --git a/systems/aarch64-linux/lin-va-mbp-personal/default.nix b/systems/aarch64-linux/lin-va-mbp-personal/default.nix index fe8b2e3..c231fd9 100755 --- a/systems/aarch64-linux/lin-va-mbp-personal/default.nix +++ b/systems/aarch64-linux/lin-va-mbp-personal/default.nix @@ -19,6 +19,7 @@ in # System Config reichard = { nix = enabled; + user.extraGroups = [ "dialout" ]; system = { boot = {