diff --git a/modules/home/programs/terminal/nvim/config/lua/lsp-config.lua b/modules/home/programs/terminal/nvim/config/lua/lsp-config.lua index d967fec..c8c873c 100755 --- a/modules/home/programs/terminal/nvim/config/lua/lsp-config.lua +++ b/modules/home/programs/terminal/nvim/config/lua/lsp-config.lua @@ -155,6 +155,9 @@ setup_lsp("lua_ls", { filetypes = { "lua" }, }) +-- Unison LSP Configuration +setup_lsp("unison") + -- Lua LSP Configuration setup_lsp("sqls", { cmd = { nix_vars.sqls }, diff --git a/modules/nixos/services/llama-swap/config.nix b/modules/nixos/services/llama-swap/config.nix index 5404c87..5392eb0 100644 --- a/modules/nixos/services/llama-swap/config.nix +++ b/modules/nixos/services/llama-swap/config.nix @@ -7,6 +7,33 @@ let in { models = { + # docker run --device=nvidia.com/gpu=all -v ~/.cache/huggingface:/root/.cache/huggingface -p 0.0.0.0:8081:8000 --ipc=host vllm/vllm-openai:latest --model Qwen/Qwen3-0.6B + # docker run --device=nvidia.com/gpu=all -v /mnt/ssd/vLLM:/root/.cache/huggingface -p 0.0.0.0:8081:8000 --ipc=host vllm/vllm-openai:latest --model cyankiwi/Devstral-Small-2-24B-Instruct-2512-AWQ-4bit --max-model-len 60000 --kv-cache-dtype fp8_e4m3 + + # https://huggingface.co/unsloth/GLM-4.7-Flash-GGUF/tree/main + "glm-4.7-flash" = { + name = "GLM 4.7 Flash (30B) - Thinking"; + macros.ctx = "80000"; + cmd = '' + ${llama-cpp}/bin/llama-server \ + --port ''${PORT} \ + -m /mnt/ssd/Models/GLM/GLM-4.7-Flash-UD-Q4_K_XL.gguf \ + -c ''${ctx} \ + --jinja \ + --threads -1 \ + --temp 0.2 \ + --top-k 50 \ + --top-p 0.95 \ + --min-p 0.01 \ + --dry-multiplier 1.1 \ + -fit off + ''; + metadata = { + type = [ "text-generation" ]; + }; + env = [ "GGML_CUDA_ENABLE_UNIFIED_MEMORY=1" ]; + }; + # https://huggingface.co/unsloth/Devstral-Small-2-24B-Instruct-2512-GGUF/tree/main "devstral-small-2-instruct" = { name = "Devstral Small 2 (24B) - Instruct"; diff --git a/modules/nixos/virtualisation/podman/default.nix b/modules/nixos/virtualisation/podman/default.nix index 21bd1c0..d92afe7 100644 --- a/modules/nixos/virtualisation/podman/default.nix +++ b/modules/nixos/virtualisation/podman/default.nix @@ -12,6 +12,7 @@ in { options.${namespace}.virtualisation.podman = { enable = lib.mkEnableOption "podman"; + enableNvidia = lib.mkEnableOption "enable nvidia"; }; config = mkIf cfg.enable { @@ -29,6 +30,8 @@ in }; }; + hardware.nvidia-container-toolkit.enable = mkIf cfg.enableNvidia true; + virtualisation = { podman = { inherit (cfg) enable; diff --git a/packages/llama-cpp/default.nix b/packages/llama-cpp/default.nix index ac4c438..6131352 100644 --- a/packages/llama-cpp/default.nix +++ b/packages/llama-cpp/default.nix @@ -7,12 +7,12 @@ vulkanSupport = true; }).overrideAttrs (oldAttrs: rec { - version = "7562"; + version = "7781"; src = pkgs.fetchFromGitHub { owner = "ggml-org"; repo = "llama.cpp"; tag = "b${version}"; - hash = "sha256-yuTPj41B3YitRPrD6yV25ilrIxVKebPGSqdJMpVhUDg="; + hash = "sha256-1ikFB4pnhSwnVPWfyQbT/7BqeyswvLNCx+aDo/ORjN0="; leaveDotGit = true; postFetch = '' git -C "$out" rev-parse --short HEAD > $out/COMMIT diff --git a/systems/x86_64-linux/lin-va-desktop/default.nix b/systems/x86_64-linux/lin-va-desktop/default.nix index c0d4adf..ab7f6d2 100755 --- a/systems/x86_64-linux/lin-va-desktop/default.nix +++ b/systems/x86_64-linux/lin-va-desktop/default.nix @@ -87,7 +87,10 @@ in }; virtualisation = { - podman = enabled; + podman = { + enable = true; + enableNvidia = true; + }; }; security = {