chore: added llama-cpp and migrate office

2025-09-05 22:48:19 -04:00
parent 9433abcaf4
commit 26f32d3225
7 changed files with 218 additions and 166 deletions
--- a/modules/home/programs/terminal/nvim/config/lua/llm.lua
+++ b/modules/home/programs/terminal/nvim/config/lua/llm.lua
@@ -1,6 +1,6 @@
 -- Configure LLama LLM
 vim.g.llama_config = {
-	endpoint = "http://10.0.50.120:8080/infill",
+	endpoint = "http://10.0.50.120:8012/infill",
 	api_key = "",
 	n_prefix = 256,
 	n_suffix = 64,
--- a/modules/nixos/hardware/opengl/default.nix
+++ b/modules/nixos/hardware/opengl/default.nix
@@ -14,6 +14,8 @@ in
  };

  config = mkIf cfg.enable {
+    services.xserver.videoDrivers = mkIf cfg.enableNvidia [ "nvidia" ];
+
    environment.systemPackages = with pkgs; [
      libva-utils
      vdpauinfo
@@ -23,6 +25,15 @@ in
      intel-gpu-tools
    ];

+    # Enable Nvidia Hardware
+    hardware.nvidia = mkIf cfg.enableNvidia {
+      package = config.boot.kernelPackages.nvidiaPackages.stable;
+      modesetting.enable = true;
+      powerManagement.enable = true;
+      open = false;
+      nvidiaSettings = true;
+    };
+
    # Add Intel Arc / Nvidia Drivers
    hardware.enableRedistributableFirmware = mkIf cfg.enableIntel (mkForce true);
    hardware.graphics = {
--- a/modules/nixos/services/llama-cpp/default.nix
+++ b/modules/nixos/services/llama-cpp/default.nix
@@ -0,0 +1,108 @@
+{ config, pkgs, lib, namespace, ... }:
+let
+  inherit (lib) types mkIf mkEnableOption;
+  inherit (lib.${namespace}) mkOpt;
+  cfg = config.${namespace}.services.llama-cpp;
+
+  modelDir = "/models";
+  availableModels = {
+    "qwen2.5-coder-7b-q8_0.gguf" = {
+      url = "https://huggingface.co/ggml-org/Qwen2.5-Coder-7B-Q8_0-GGUF/resolve/main/qwen2.5-coder-7b-q8_0.gguf?download=true";
+      flag = "--fim-qwen-7b-default";
+    };
+    "qwen2.5-coder-3b-q8_0.gguf" = {
+      url = "https://huggingface.co/ggml-org/Qwen2.5-Coder-3B-Q8_0-GGUF/resolve/main/qwen2.5-coder-3b-q8_0.gguf?download=true";
+      flag = "--fim-qwen-3b-default";
+    };
+  };
+in
+{
+  options.${namespace}.services.llama-cpp = with types; {
+    enable = mkEnableOption "llama-cpp support";
+    modelName = mkOpt str "qwen2.5-coder-3b-q8_0.gguf" "model to use";
+  };
+
+  config =
+    let
+      modelPath = "${modelDir}/${cfg.modelName}";
+    in
+    mkIf cfg.enable {
+      assertions = [
+        {
+          assertion = availableModels ? ${cfg.modelName};
+          message = "Invalid model '${cfg.modelName}'. Available models: ${lib.concatStringsSep ", " (lib.attrNames availableModels)}";
+        }
+      ];
+
+      systemd.services = {
+        # LLama Download Model
+        download-model = {
+          description = "Download Model";
+          wantedBy = [ "multi-user.target" ];
+          before = [ "llama-cpp.service" ];
+          path = [ pkgs.curl pkgs.coreutils ];
+          serviceConfig = {
+            Type = "oneshot";
+            RemainAfterExit = true;
+            User = "root";
+            Group = "root";
+          };
+          script =
+            let
+              modelURL = availableModels.${cfg.modelName}.url;
+            in
+            ''
+              set -euo pipefail
+
+              if [ ! -f "${modelPath}" ]; then
+                mkdir -p "${modelDir}"
+                # Add -f flag to follow redirects and -L for location
+                # Add --fail flag to exit with error on HTTP errors
+                # Add -C - to resume interrupted downloads
+                curl -f -L -C - \
+                  -H "Accept: application/octet-stream" \
+                  --retry 3 \
+                  --retry-delay 5 \
+                  --max-time 1800 \
+                  "${modelURL}" \
+                  -o "${modelPath}.tmp" && \
+                mv "${modelPath}.tmp" "${modelPath}"
+              fi
+            '';
+        };
+
+        # Setup LLama API Service
+        llama-cpp = {
+          after = [ "download-model.service" ];
+          requires = [ "download-model.service" ];
+        };
+      };
+
+      services.llama-cpp = {
+        enable = true;
+        host = "0.0.0.0";
+        port = 8012;
+        openFirewall = true;
+        model = "${modelPath}";
+
+        package = (pkgs.llama-cpp.override {
+          cudaSupport = true;
+        }).overrideAttrs (oldAttrs: {
+          cmakeFlags = oldAttrs.cmakeFlags ++ [
+            "-DGGML_CUDA_ENABLE_UNIFIED_MEMORY=1"
+            "-DCMAKE_CUDA_ARCHITECTURES=61" # GTX-1070
+
+            # Disable CPU Instructions - Intel(R) Core(TM) i5-3570K CPU @ 3.40GHz
+            "-DLLAMA_FMA=OFF"
+            "-DLLAMA_AVX2=OFF"
+            "-DLLAMA_AVX512=OFF"
+            "-DGGML_FMA=OFF"
+            "-DGGML_AVX2=OFF"
+            "-DGGML_AVX512=OFF"
+          ];
+        });
+
+        extraFlags = [ availableModels.${cfg.modelName}.flag ];
+      };
+    };
+}
--- a/modules/nixos/services/rtl-tcp/default.nix
+++ b/modules/nixos/services/rtl-tcp/default.nix
@@ -0,0 +1,32 @@
+{ config, pkgs, lib, namespace, ... }:
+let
+  inherit (lib) mkIf mkEnableOption;
+  inherit (lib.${namespace}) mkBoolOpt;
+  cfg = config.${namespace}.services.rtl-tcp;
+in
+{
+  options.${namespace}.services.rtl-tcp = {
+    enable = mkEnableOption "RTL-TCP support";
+    openFirewall = mkBoolOpt true "Open firewall";
+  };
+
+  config = mkIf cfg.enable {
+    hardware.rtl-sdr.enable = true;
+    networking.firewall.allowedTCPPorts = mkIf cfg.openFirewall [ 1234 ];
+
+    # RTL-SDR TCP Server Service
+    systemd.services.rtl-tcp = {
+      description = "RTL-SDR TCP Server";
+      after = [ "network.target" ];
+      wantedBy = [ "multi-user.target" ];
+
+      serviceConfig = {
+        ExecStart = "${pkgs.rtl-sdr}/bin/rtl_tcp -a 0.0.0.0 -f 1090000000 -s 2400000";
+        Restart = "on-failure";
+        RestartSec = "10s";
+        User = "root";
+        Group = "root";
+      };
+    };
+  };
+}
--- a/modules/nixos/system/boot/default.nix
+++ b/modules/nixos/system/boot/default.nix
@@ -33,10 +33,16 @@ in
          canTouchEfiVariables = false;
        };

-        systemd-boot = {
+        # systemd-boot = {
+        #   enable = true;
+        #   configurationLimit = 20;
+        #   editor = false;
+        # };
+
+        grub = {
          enable = true;
-          configurationLimit = 20;
-          editor = false;
+          efiSupport = true;
+          efiInstallAsRemovable = true;
        };

        timeout = mkDefault 1;
--- a/modules/nixos/system/disk/default.nix
+++ b/modules/nixos/system/disk/default.nix
@@ -35,14 +35,6 @@ in
                  mountpoint = "/boot";
                };
              };
-              root = {
-                name = "root";
-                size = "100%";
-                content = {
-                  type = "lvm_pv";
-                  vg = "pool";
-                };
-              };
              swap = {
                size = "32G";
                content = {
@@ -51,6 +43,14 @@ in
                  resumeDevice = true;
                };
              };
+              root = {
+                name = "root";
+                size = "100%";
+                content = {
+                  type = "lvm_pv";
+                  vg = "pool";
+                };
+              };
            };
          };
        };
--- a/systems/x86_64-linux/lin-va-office/default.nix
+++ b/systems/x86_64-linux/lin-va-office/default.nix
@@ -1,175 +1,70 @@
-{ config, pkgs, ... }:
-
+{ namespace, pkgs, config, lib, ... }:
 let
-  cuda-llama = (pkgs.llama-cpp.override {
-    cudaSupport = true;
-  }).overrideAttrs (oldAttrs: {
-    cmakeFlags = oldAttrs.cmakeFlags ++ [
-      "-DGGML_CUDA_ENABLE_UNIFIED_MEMORY=1"
-
-      # Disable CPU Instructions - Intel(R) Core(TM) i5-3570K CPU @ 3.40GHz
-      "-DLLAMA_FMA=OFF"
-      "-DLLAMA_AVX2=OFF"
-      "-DLLAMA_AVX512=OFF"
-      "-DGGML_FMA=OFF"
-      "-DGGML_AVX2=OFF"
-      "-DGGML_AVX512=OFF"
-    ];
-  });
-
-  # Define Model Vars
-  modelDir = "/models";
-
-  # 7B
-  # modelName = "qwen2.5-coder-7b-q8_0.gguf";
-  # modelUrl = "https://huggingface.co/ggml-org/Qwen2.5-Coder-7B-Q8_0-GGUF/resolve/main/${modelName}?download=true";
-
-  # 3B
-  modelName = "qwen2.5-coder-3b-q8_0.gguf";
-  modelUrl = "https://huggingface.co/ggml-org/Qwen2.5-Coder-3B-Q8_0-GGUF/resolve/main/${modelName}?download=true";
-
-  modelPath = "${modelDir}/${modelName}";
+  inherit (lib.${namespace}) enabled;
+  cfg = config.${namespace}.user;
 in
-
 {
-  # Allow Nvidia & CUDA
+  system.stateVersion = "25.05";
+  time.timeZone = "America/New_York";
+
  nixpkgs.config.allowUnfree = true;

-  # Enable Graphics
-  hardware.graphics = {
-    enable = true;
-    enable32Bit = true;
-    extraPackages = [ pkgs.cudatoolkit ];
-  };
+  # System Config
+  reichard = {
+    nix = enabled;

-  # Load Nvidia Driver Module
-  services.xserver.videoDrivers = [ "nvidia" ];
-
-  # Nvidia Package Configuration
-  hardware.nvidia = {
-    package = config.boot.kernelPackages.nvidiaPackages.stable;
-    modesetting.enable = true;
-    powerManagement.enable = true;
-    open = false;
-    nvidiaSettings = true;
-  };
-
-  # Networking Configuration
-  networking.firewall = {
-    enable = true;
-    allowedTCPPorts = [
-      1234 # RTL-TCP
-      8080 # LLama API
-    ];
-  };
-
-  # RTL-SDR
-  hardware.rtl-sdr.enable = true;
-
-  systemd.services = {
-    # LLama Download Model
-    download-model = {
-      description = "Download Model";
-      wantedBy = [ "multi-user.target" ];
-      before = [ "llama-cpp.service" ];
-      path = [ pkgs.curl pkgs.coreutils ];
-      serviceConfig = {
-        Type = "oneshot";
-        RemainAfterExit = true;
-        User = "root";
-        Group = "root";
+    system = {
+      boot = {
+        enable = true;
+        silentBoot = true;
+      };
+      disk = {
+        enable = true;
+        diskPath = "/dev/sda";
+      };
+      networking = {
+        enable = true;
+        useStatic = {
+          interface = "enp5s0";
+          address = "10.0.50.120";
+          defaultGateway = "10.0.50.254";
+          nameservers = [ "10.0.20.20" ];
+        };
      };
-      script = ''
-        set -euo pipefail
-
-        if [ ! -f "${modelPath}" ]; then
-          mkdir -p "${modelDir}"
-          # Add -f flag to follow redirects and -L for location
-          # Add --fail flag to exit with error on HTTP errors
-          # Add -C - to resume interrupted downloads
-          curl -f -L -C - \
-            -H "Accept: application/octet-stream" \
-            --retry 3 \
-            --retry-delay 5 \
-            --max-time 1800 \
-            "${modelUrl}" \
-            -o "${modelPath}.tmp" && \
-          mv "${modelPath}.tmp" "${modelPath}"
-        fi
-      '';
    };

-    # RTL-SDR TCP Server Service
-    rtl-tcp = {
-      description = "RTL-SDR TCP Server";
-      after = [ "network.target" ];
-      wantedBy = [ "multi-user.target" ];
-
-      serviceConfig = {
-        ExecStart = "${pkgs.rtl-sdr}/bin/rtl_tcp -a 0.0.0.0 -f 1090000000 -s 2400000";
-        Restart = "on-failure";
-        RestartSec = "10s";
-        User = "root";
-        Group = "root";
+    hardware = {
+      opengl = {
+        enable = true;
+        enableNvidia = true;
      };
    };
+
+    services = {
+      openssh = enabled;
+      llama-cpp = enabled;
+      rtl-tcp = enabled;
+    };
  };

-  # Setup LLama API Service
-  systemd.services.llama-cpp = {
-    after = [ "download-model.service" ];
-    requires = [ "download-model.service" ];
-  };
-
-  # Enable LLama API
-  services.llama-cpp = {
-    enable = true;
-    host = "0.0.0.0";
-    package = cuda-llama;
-    model = modelPath;
-    port = 8080;
-    openFirewall = true;
-
-    # 7B
-    # extraFlags = [
-    #   "-ngl"
-    #   "99"
-    #   "-fa"
-    #   "-ub"
-    #   "512"
-    #   "-b"
-    #   "512"
-    #   "-dt"
-    #   "0.1"
-    #   "--ctx-size"
-    #   "4096"
-    #   "--cache-reuse"
-    #   "256"
-    # ];
-
-    # 3B
-    extraFlags = [
-      "-ngl"
-      "99"
-      "-fa"
-      "-ub"
-      "1024"
-      "-b"
-      "1024"
-      "--ctx-size"
-      "0"
-      "--cache-reuse"
-      "256"
-    ];
+  users.users.${cfg.name} = {
+    openssh = {
+      authorizedKeys.keys = [
+        # evanreichard@lin-va-mbp-personal
+        "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILJJoyXQOv9cAjGUHrUcvsW7vY9W0PmuPMQSI9AMZvNY"
+        # evanreichard@mac-va-mbp-personal
+        "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMWj6rd6uDtHj/gGozgIEgxho/vBKebgN5Kce/N6vQWV"
+        # evanreichard@lin-va-thinkpad
+        "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAq5JQr/6WJMIHhR434nK95FrDmf2ApW2Ahd2+cBKwDz"
+      ];
+    };
  };

  # System Packages
  environment.systemPackages = with pkgs; [
-    htop
-    nvtopPackages.full
-    rtl-sdr
+    btop
+    git
    tmux
    vim
-    wget
  ];
 }