diff --git a/modules/home/programs/terminal/nvim/config/lua/llm.lua b/modules/home/programs/terminal/nvim/config/lua/llm.lua index 2a93118..fcbb9e4 100755 --- a/modules/home/programs/terminal/nvim/config/lua/llm.lua +++ b/modules/home/programs/terminal/nvim/config/lua/llm.lua @@ -1,6 +1,6 @@ -- Configure LLama LLM vim.g.llama_config = { - endpoint = "http://10.0.50.120:8080/infill", + endpoint = "http://10.0.50.120:8012/infill", api_key = "", n_prefix = 256, n_suffix = 64, diff --git a/modules/nixos/hardware/opengl/default.nix b/modules/nixos/hardware/opengl/default.nix index 04425c4..4c1dfef 100644 --- a/modules/nixos/hardware/opengl/default.nix +++ b/modules/nixos/hardware/opengl/default.nix @@ -14,6 +14,8 @@ in }; config = mkIf cfg.enable { + services.xserver.videoDrivers = mkIf cfg.enableNvidia [ "nvidia" ]; + environment.systemPackages = with pkgs; [ libva-utils vdpauinfo @@ -23,6 +25,15 @@ in intel-gpu-tools ]; + # Enable Nvidia Hardware + hardware.nvidia = mkIf cfg.enableNvidia { + package = config.boot.kernelPackages.nvidiaPackages.stable; + modesetting.enable = true; + powerManagement.enable = true; + open = false; + nvidiaSettings = true; + }; + # Add Intel Arc / Nvidia Drivers hardware.enableRedistributableFirmware = mkIf cfg.enableIntel (mkForce true); hardware.graphics = { diff --git a/modules/nixos/services/llama-cpp/default.nix b/modules/nixos/services/llama-cpp/default.nix new file mode 100644 index 0000000..38b6e2b --- /dev/null +++ b/modules/nixos/services/llama-cpp/default.nix @@ -0,0 +1,108 @@ +{ config, pkgs, lib, namespace, ... }: +let + inherit (lib) types mkIf mkEnableOption; + inherit (lib.${namespace}) mkOpt; + cfg = config.${namespace}.services.llama-cpp; + + modelDir = "/models"; + availableModels = { + "qwen2.5-coder-7b-q8_0.gguf" = { + url = "https://huggingface.co/ggml-org/Qwen2.5-Coder-7B-Q8_0-GGUF/resolve/main/qwen2.5-coder-7b-q8_0.gguf?download=true"; + flag = "--fim-qwen-7b-default"; + }; + "qwen2.5-coder-3b-q8_0.gguf" = { + url = "https://huggingface.co/ggml-org/Qwen2.5-Coder-3B-Q8_0-GGUF/resolve/main/qwen2.5-coder-3b-q8_0.gguf?download=true"; + flag = "--fim-qwen-3b-default"; + }; + }; +in +{ + options.${namespace}.services.llama-cpp = with types; { + enable = mkEnableOption "llama-cpp support"; + modelName = mkOpt str "qwen2.5-coder-3b-q8_0.gguf" "model to use"; + }; + + config = + let + modelPath = "${modelDir}/${cfg.modelName}"; + in + mkIf cfg.enable { + assertions = [ + { + assertion = availableModels ? ${cfg.modelName}; + message = "Invalid model '${cfg.modelName}'. Available models: ${lib.concatStringsSep ", " (lib.attrNames availableModels)}"; + } + ]; + + systemd.services = { + # LLama Download Model + download-model = { + description = "Download Model"; + wantedBy = [ "multi-user.target" ]; + before = [ "llama-cpp.service" ]; + path = [ pkgs.curl pkgs.coreutils ]; + serviceConfig = { + Type = "oneshot"; + RemainAfterExit = true; + User = "root"; + Group = "root"; + }; + script = + let + modelURL = availableModels.${cfg.modelName}.url; + in + '' + set -euo pipefail + + if [ ! -f "${modelPath}" ]; then + mkdir -p "${modelDir}" + # Add -f flag to follow redirects and -L for location + # Add --fail flag to exit with error on HTTP errors + # Add -C - to resume interrupted downloads + curl -f -L -C - \ + -H "Accept: application/octet-stream" \ + --retry 3 \ + --retry-delay 5 \ + --max-time 1800 \ + "${modelURL}" \ + -o "${modelPath}.tmp" && \ + mv "${modelPath}.tmp" "${modelPath}" + fi + ''; + }; + + # Setup LLama API Service + llama-cpp = { + after = [ "download-model.service" ]; + requires = [ "download-model.service" ]; + }; + }; + + services.llama-cpp = { + enable = true; + host = "0.0.0.0"; + port = 8012; + openFirewall = true; + model = "${modelPath}"; + + package = (pkgs.llama-cpp.override { + cudaSupport = true; + }).overrideAttrs (oldAttrs: { + cmakeFlags = oldAttrs.cmakeFlags ++ [ + "-DGGML_CUDA_ENABLE_UNIFIED_MEMORY=1" + "-DCMAKE_CUDA_ARCHITECTURES=61" # GTX-1070 + + # Disable CPU Instructions - Intel(R) Core(TM) i5-3570K CPU @ 3.40GHz + "-DLLAMA_FMA=OFF" + "-DLLAMA_AVX2=OFF" + "-DLLAMA_AVX512=OFF" + "-DGGML_FMA=OFF" + "-DGGML_AVX2=OFF" + "-DGGML_AVX512=OFF" + ]; + }); + + extraFlags = [ availableModels.${cfg.modelName}.flag ]; + }; + }; +} diff --git a/modules/nixos/services/rtl-tcp/default.nix b/modules/nixos/services/rtl-tcp/default.nix new file mode 100644 index 0000000..319131b --- /dev/null +++ b/modules/nixos/services/rtl-tcp/default.nix @@ -0,0 +1,32 @@ +{ config, pkgs, lib, namespace, ... }: +let + inherit (lib) mkIf mkEnableOption; + inherit (lib.${namespace}) mkBoolOpt; + cfg = config.${namespace}.services.rtl-tcp; +in +{ + options.${namespace}.services.rtl-tcp = { + enable = mkEnableOption "RTL-TCP support"; + openFirewall = mkBoolOpt true "Open firewall"; + }; + + config = mkIf cfg.enable { + hardware.rtl-sdr.enable = true; + networking.firewall.allowedTCPPorts = mkIf cfg.openFirewall [ 1234 ]; + + # RTL-SDR TCP Server Service + systemd.services.rtl-tcp = { + description = "RTL-SDR TCP Server"; + after = [ "network.target" ]; + wantedBy = [ "multi-user.target" ]; + + serviceConfig = { + ExecStart = "${pkgs.rtl-sdr}/bin/rtl_tcp -a 0.0.0.0 -f 1090000000 -s 2400000"; + Restart = "on-failure"; + RestartSec = "10s"; + User = "root"; + Group = "root"; + }; + }; + }; +} diff --git a/modules/nixos/system/boot/default.nix b/modules/nixos/system/boot/default.nix index dfdf9ae..e64b7c5 100644 --- a/modules/nixos/system/boot/default.nix +++ b/modules/nixos/system/boot/default.nix @@ -33,10 +33,16 @@ in canTouchEfiVariables = false; }; - systemd-boot = { + # systemd-boot = { + # enable = true; + # configurationLimit = 20; + # editor = false; + # }; + + grub = { enable = true; - configurationLimit = 20; - editor = false; + efiSupport = true; + efiInstallAsRemovable = true; }; timeout = mkDefault 1; diff --git a/modules/nixos/system/disk/default.nix b/modules/nixos/system/disk/default.nix index 410538e..68d3dfc 100644 --- a/modules/nixos/system/disk/default.nix +++ b/modules/nixos/system/disk/default.nix @@ -35,14 +35,6 @@ in mountpoint = "/boot"; }; }; - root = { - name = "root"; - size = "100%"; - content = { - type = "lvm_pv"; - vg = "pool"; - }; - }; swap = { size = "32G"; content = { @@ -51,6 +43,14 @@ in resumeDevice = true; }; }; + root = { + name = "root"; + size = "100%"; + content = { + type = "lvm_pv"; + vg = "pool"; + }; + }; }; }; }; diff --git a/systems/x86_64-linux/lin-va-office/default.nix b/systems/x86_64-linux/lin-va-office/default.nix index 4d09682..df98841 100755 --- a/systems/x86_64-linux/lin-va-office/default.nix +++ b/systems/x86_64-linux/lin-va-office/default.nix @@ -1,175 +1,70 @@ -{ config, pkgs, ... }: - +{ namespace, pkgs, config, lib, ... }: let - cuda-llama = (pkgs.llama-cpp.override { - cudaSupport = true; - }).overrideAttrs (oldAttrs: { - cmakeFlags = oldAttrs.cmakeFlags ++ [ - "-DGGML_CUDA_ENABLE_UNIFIED_MEMORY=1" - - # Disable CPU Instructions - Intel(R) Core(TM) i5-3570K CPU @ 3.40GHz - "-DLLAMA_FMA=OFF" - "-DLLAMA_AVX2=OFF" - "-DLLAMA_AVX512=OFF" - "-DGGML_FMA=OFF" - "-DGGML_AVX2=OFF" - "-DGGML_AVX512=OFF" - ]; - }); - - # Define Model Vars - modelDir = "/models"; - - # 7B - # modelName = "qwen2.5-coder-7b-q8_0.gguf"; - # modelUrl = "https://huggingface.co/ggml-org/Qwen2.5-Coder-7B-Q8_0-GGUF/resolve/main/${modelName}?download=true"; - - # 3B - modelName = "qwen2.5-coder-3b-q8_0.gguf"; - modelUrl = "https://huggingface.co/ggml-org/Qwen2.5-Coder-3B-Q8_0-GGUF/resolve/main/${modelName}?download=true"; - - modelPath = "${modelDir}/${modelName}"; + inherit (lib.${namespace}) enabled; + cfg = config.${namespace}.user; in - { - # Allow Nvidia & CUDA + system.stateVersion = "25.05"; + time.timeZone = "America/New_York"; + nixpkgs.config.allowUnfree = true; - # Enable Graphics - hardware.graphics = { - enable = true; - enable32Bit = true; - extraPackages = [ pkgs.cudatoolkit ]; - }; + # System Config + reichard = { + nix = enabled; - # Load Nvidia Driver Module - services.xserver.videoDrivers = [ "nvidia" ]; - - # Nvidia Package Configuration - hardware.nvidia = { - package = config.boot.kernelPackages.nvidiaPackages.stable; - modesetting.enable = true; - powerManagement.enable = true; - open = false; - nvidiaSettings = true; - }; - - # Networking Configuration - networking.firewall = { - enable = true; - allowedTCPPorts = [ - 1234 # RTL-TCP - 8080 # LLama API - ]; - }; - - # RTL-SDR - hardware.rtl-sdr.enable = true; - - systemd.services = { - # LLama Download Model - download-model = { - description = "Download Model"; - wantedBy = [ "multi-user.target" ]; - before = [ "llama-cpp.service" ]; - path = [ pkgs.curl pkgs.coreutils ]; - serviceConfig = { - Type = "oneshot"; - RemainAfterExit = true; - User = "root"; - Group = "root"; + system = { + boot = { + enable = true; + silentBoot = true; + }; + disk = { + enable = true; + diskPath = "/dev/sda"; + }; + networking = { + enable = true; + useStatic = { + interface = "enp5s0"; + address = "10.0.50.120"; + defaultGateway = "10.0.50.254"; + nameservers = [ "10.0.20.20" ]; + }; }; - script = '' - set -euo pipefail - - if [ ! -f "${modelPath}" ]; then - mkdir -p "${modelDir}" - # Add -f flag to follow redirects and -L for location - # Add --fail flag to exit with error on HTTP errors - # Add -C - to resume interrupted downloads - curl -f -L -C - \ - -H "Accept: application/octet-stream" \ - --retry 3 \ - --retry-delay 5 \ - --max-time 1800 \ - "${modelUrl}" \ - -o "${modelPath}.tmp" && \ - mv "${modelPath}.tmp" "${modelPath}" - fi - ''; }; - # RTL-SDR TCP Server Service - rtl-tcp = { - description = "RTL-SDR TCP Server"; - after = [ "network.target" ]; - wantedBy = [ "multi-user.target" ]; - - serviceConfig = { - ExecStart = "${pkgs.rtl-sdr}/bin/rtl_tcp -a 0.0.0.0 -f 1090000000 -s 2400000"; - Restart = "on-failure"; - RestartSec = "10s"; - User = "root"; - Group = "root"; + hardware = { + opengl = { + enable = true; + enableNvidia = true; }; }; + + services = { + openssh = enabled; + llama-cpp = enabled; + rtl-tcp = enabled; + }; }; - # Setup LLama API Service - systemd.services.llama-cpp = { - after = [ "download-model.service" ]; - requires = [ "download-model.service" ]; - }; - - # Enable LLama API - services.llama-cpp = { - enable = true; - host = "0.0.0.0"; - package = cuda-llama; - model = modelPath; - port = 8080; - openFirewall = true; - - # 7B - # extraFlags = [ - # "-ngl" - # "99" - # "-fa" - # "-ub" - # "512" - # "-b" - # "512" - # "-dt" - # "0.1" - # "--ctx-size" - # "4096" - # "--cache-reuse" - # "256" - # ]; - - # 3B - extraFlags = [ - "-ngl" - "99" - "-fa" - "-ub" - "1024" - "-b" - "1024" - "--ctx-size" - "0" - "--cache-reuse" - "256" - ]; + users.users.${cfg.name} = { + openssh = { + authorizedKeys.keys = [ + # evanreichard@lin-va-mbp-personal + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILJJoyXQOv9cAjGUHrUcvsW7vY9W0PmuPMQSI9AMZvNY" + # evanreichard@mac-va-mbp-personal + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMWj6rd6uDtHj/gGozgIEgxho/vBKebgN5Kce/N6vQWV" + # evanreichard@lin-va-thinkpad + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAq5JQr/6WJMIHhR434nK95FrDmf2ApW2Ahd2+cBKwDz" + ]; + }; }; # System Packages environment.systemPackages = with pkgs; [ - htop - nvtopPackages.full - rtl-sdr + btop + git tmux vim - wget ]; }