chore: added llama-cpp and migrate office

This commit is contained in:
Evan Reichard 2025-09-05 22:48:19 -04:00
parent 9433abcaf4
commit 26f32d3225
7 changed files with 218 additions and 166 deletions

View File

@ -1,6 +1,6 @@
-- Configure LLama LLM
vim.g.llama_config = {
endpoint = "http://10.0.50.120:8080/infill",
endpoint = "http://10.0.50.120:8012/infill",
api_key = "",
n_prefix = 256,
n_suffix = 64,

View File

@ -14,6 +14,8 @@ in
};
config = mkIf cfg.enable {
services.xserver.videoDrivers = mkIf cfg.enableNvidia [ "nvidia" ];
environment.systemPackages = with pkgs; [
libva-utils
vdpauinfo
@ -23,6 +25,15 @@ in
intel-gpu-tools
];
# Enable Nvidia Hardware
hardware.nvidia = mkIf cfg.enableNvidia {
package = config.boot.kernelPackages.nvidiaPackages.stable;
modesetting.enable = true;
powerManagement.enable = true;
open = false;
nvidiaSettings = true;
};
# Add Intel Arc / Nvidia Drivers
hardware.enableRedistributableFirmware = mkIf cfg.enableIntel (mkForce true);
hardware.graphics = {

View File

@ -0,0 +1,108 @@
{ config, pkgs, lib, namespace, ... }:
let
inherit (lib) types mkIf mkEnableOption;
inherit (lib.${namespace}) mkOpt;
cfg = config.${namespace}.services.llama-cpp;
modelDir = "/models";
availableModels = {
"qwen2.5-coder-7b-q8_0.gguf" = {
url = "https://huggingface.co/ggml-org/Qwen2.5-Coder-7B-Q8_0-GGUF/resolve/main/qwen2.5-coder-7b-q8_0.gguf?download=true";
flag = "--fim-qwen-7b-default";
};
"qwen2.5-coder-3b-q8_0.gguf" = {
url = "https://huggingface.co/ggml-org/Qwen2.5-Coder-3B-Q8_0-GGUF/resolve/main/qwen2.5-coder-3b-q8_0.gguf?download=true";
flag = "--fim-qwen-3b-default";
};
};
in
{
options.${namespace}.services.llama-cpp = with types; {
enable = mkEnableOption "llama-cpp support";
modelName = mkOpt str "qwen2.5-coder-3b-q8_0.gguf" "model to use";
};
config =
let
modelPath = "${modelDir}/${cfg.modelName}";
in
mkIf cfg.enable {
assertions = [
{
assertion = availableModels ? ${cfg.modelName};
message = "Invalid model '${cfg.modelName}'. Available models: ${lib.concatStringsSep ", " (lib.attrNames availableModels)}";
}
];
systemd.services = {
# LLama Download Model
download-model = {
description = "Download Model";
wantedBy = [ "multi-user.target" ];
before = [ "llama-cpp.service" ];
path = [ pkgs.curl pkgs.coreutils ];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
User = "root";
Group = "root";
};
script =
let
modelURL = availableModels.${cfg.modelName}.url;
in
''
set -euo pipefail
if [ ! -f "${modelPath}" ]; then
mkdir -p "${modelDir}"
# Add -f flag to follow redirects and -L for location
# Add --fail flag to exit with error on HTTP errors
# Add -C - to resume interrupted downloads
curl -f -L -C - \
-H "Accept: application/octet-stream" \
--retry 3 \
--retry-delay 5 \
--max-time 1800 \
"${modelURL}" \
-o "${modelPath}.tmp" && \
mv "${modelPath}.tmp" "${modelPath}"
fi
'';
};
# Setup LLama API Service
llama-cpp = {
after = [ "download-model.service" ];
requires = [ "download-model.service" ];
};
};
services.llama-cpp = {
enable = true;
host = "0.0.0.0";
port = 8012;
openFirewall = true;
model = "${modelPath}";
package = (pkgs.llama-cpp.override {
cudaSupport = true;
}).overrideAttrs (oldAttrs: {
cmakeFlags = oldAttrs.cmakeFlags ++ [
"-DGGML_CUDA_ENABLE_UNIFIED_MEMORY=1"
"-DCMAKE_CUDA_ARCHITECTURES=61" # GTX-1070
# Disable CPU Instructions - Intel(R) Core(TM) i5-3570K CPU @ 3.40GHz
"-DLLAMA_FMA=OFF"
"-DLLAMA_AVX2=OFF"
"-DLLAMA_AVX512=OFF"
"-DGGML_FMA=OFF"
"-DGGML_AVX2=OFF"
"-DGGML_AVX512=OFF"
];
});
extraFlags = [ availableModels.${cfg.modelName}.flag ];
};
};
}

View File

@ -0,0 +1,32 @@
{ config, pkgs, lib, namespace, ... }:
let
inherit (lib) mkIf mkEnableOption;
inherit (lib.${namespace}) mkBoolOpt;
cfg = config.${namespace}.services.rtl-tcp;
in
{
options.${namespace}.services.rtl-tcp = {
enable = mkEnableOption "RTL-TCP support";
openFirewall = mkBoolOpt true "Open firewall";
};
config = mkIf cfg.enable {
hardware.rtl-sdr.enable = true;
networking.firewall.allowedTCPPorts = mkIf cfg.openFirewall [ 1234 ];
# RTL-SDR TCP Server Service
systemd.services.rtl-tcp = {
description = "RTL-SDR TCP Server";
after = [ "network.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart = "${pkgs.rtl-sdr}/bin/rtl_tcp -a 0.0.0.0 -f 1090000000 -s 2400000";
Restart = "on-failure";
RestartSec = "10s";
User = "root";
Group = "root";
};
};
};
}

View File

@ -33,10 +33,16 @@ in
canTouchEfiVariables = false;
};
systemd-boot = {
# systemd-boot = {
# enable = true;
# configurationLimit = 20;
# editor = false;
# };
grub = {
enable = true;
configurationLimit = 20;
editor = false;
efiSupport = true;
efiInstallAsRemovable = true;
};
timeout = mkDefault 1;

View File

@ -35,14 +35,6 @@ in
mountpoint = "/boot";
};
};
root = {
name = "root";
size = "100%";
content = {
type = "lvm_pv";
vg = "pool";
};
};
swap = {
size = "32G";
content = {
@ -51,6 +43,14 @@ in
resumeDevice = true;
};
};
root = {
name = "root";
size = "100%";
content = {
type = "lvm_pv";
vg = "pool";
};
};
};
};
};

View File

@ -1,175 +1,70 @@
{ config, pkgs, ... }:
{ namespace, pkgs, config, lib, ... }:
let
cuda-llama = (pkgs.llama-cpp.override {
cudaSupport = true;
}).overrideAttrs (oldAttrs: {
cmakeFlags = oldAttrs.cmakeFlags ++ [
"-DGGML_CUDA_ENABLE_UNIFIED_MEMORY=1"
# Disable CPU Instructions - Intel(R) Core(TM) i5-3570K CPU @ 3.40GHz
"-DLLAMA_FMA=OFF"
"-DLLAMA_AVX2=OFF"
"-DLLAMA_AVX512=OFF"
"-DGGML_FMA=OFF"
"-DGGML_AVX2=OFF"
"-DGGML_AVX512=OFF"
];
});
# Define Model Vars
modelDir = "/models";
# 7B
# modelName = "qwen2.5-coder-7b-q8_0.gguf";
# modelUrl = "https://huggingface.co/ggml-org/Qwen2.5-Coder-7B-Q8_0-GGUF/resolve/main/${modelName}?download=true";
# 3B
modelName = "qwen2.5-coder-3b-q8_0.gguf";
modelUrl = "https://huggingface.co/ggml-org/Qwen2.5-Coder-3B-Q8_0-GGUF/resolve/main/${modelName}?download=true";
modelPath = "${modelDir}/${modelName}";
inherit (lib.${namespace}) enabled;
cfg = config.${namespace}.user;
in
{
# Allow Nvidia & CUDA
system.stateVersion = "25.05";
time.timeZone = "America/New_York";
nixpkgs.config.allowUnfree = true;
# Enable Graphics
hardware.graphics = {
enable = true;
enable32Bit = true;
extraPackages = [ pkgs.cudatoolkit ];
};
# System Config
reichard = {
nix = enabled;
# Load Nvidia Driver Module
services.xserver.videoDrivers = [ "nvidia" ];
# Nvidia Package Configuration
hardware.nvidia = {
package = config.boot.kernelPackages.nvidiaPackages.stable;
modesetting.enable = true;
powerManagement.enable = true;
open = false;
nvidiaSettings = true;
};
# Networking Configuration
networking.firewall = {
enable = true;
allowedTCPPorts = [
1234 # RTL-TCP
8080 # LLama API
];
};
# RTL-SDR
hardware.rtl-sdr.enable = true;
systemd.services = {
# LLama Download Model
download-model = {
description = "Download Model";
wantedBy = [ "multi-user.target" ];
before = [ "llama-cpp.service" ];
path = [ pkgs.curl pkgs.coreutils ];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
User = "root";
Group = "root";
system = {
boot = {
enable = true;
silentBoot = true;
};
disk = {
enable = true;
diskPath = "/dev/sda";
};
networking = {
enable = true;
useStatic = {
interface = "enp5s0";
address = "10.0.50.120";
defaultGateway = "10.0.50.254";
nameservers = [ "10.0.20.20" ];
};
};
script = ''
set -euo pipefail
if [ ! -f "${modelPath}" ]; then
mkdir -p "${modelDir}"
# Add -f flag to follow redirects and -L for location
# Add --fail flag to exit with error on HTTP errors
# Add -C - to resume interrupted downloads
curl -f -L -C - \
-H "Accept: application/octet-stream" \
--retry 3 \
--retry-delay 5 \
--max-time 1800 \
"${modelUrl}" \
-o "${modelPath}.tmp" && \
mv "${modelPath}.tmp" "${modelPath}"
fi
'';
};
# RTL-SDR TCP Server Service
rtl-tcp = {
description = "RTL-SDR TCP Server";
after = [ "network.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart = "${pkgs.rtl-sdr}/bin/rtl_tcp -a 0.0.0.0 -f 1090000000 -s 2400000";
Restart = "on-failure";
RestartSec = "10s";
User = "root";
Group = "root";
hardware = {
opengl = {
enable = true;
enableNvidia = true;
};
};
services = {
openssh = enabled;
llama-cpp = enabled;
rtl-tcp = enabled;
};
};
# Setup LLama API Service
systemd.services.llama-cpp = {
after = [ "download-model.service" ];
requires = [ "download-model.service" ];
};
# Enable LLama API
services.llama-cpp = {
enable = true;
host = "0.0.0.0";
package = cuda-llama;
model = modelPath;
port = 8080;
openFirewall = true;
# 7B
# extraFlags = [
# "-ngl"
# "99"
# "-fa"
# "-ub"
# "512"
# "-b"
# "512"
# "-dt"
# "0.1"
# "--ctx-size"
# "4096"
# "--cache-reuse"
# "256"
# ];
# 3B
extraFlags = [
"-ngl"
"99"
"-fa"
"-ub"
"1024"
"-b"
"1024"
"--ctx-size"
"0"
"--cache-reuse"
"256"
];
users.users.${cfg.name} = {
openssh = {
authorizedKeys.keys = [
# evanreichard@lin-va-mbp-personal
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILJJoyXQOv9cAjGUHrUcvsW7vY9W0PmuPMQSI9AMZvNY"
# evanreichard@mac-va-mbp-personal
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMWj6rd6uDtHj/gGozgIEgxho/vBKebgN5Kce/N6vQWV"
# evanreichard@lin-va-thinkpad
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAq5JQr/6WJMIHhR434nK95FrDmf2ApW2Ahd2+cBKwDz"
];
};
};
# System Packages
environment.systemPackages = with pkgs; [
htop
nvtopPackages.full
rtl-sdr
btop
git
tmux
vim
wget
];
}