chore(llm): clean up models & fix llama-cpp issue

2025-12-10 12:12:50 -05:00
parent 30934c8f7c
commit c1a650a90e
12 changed files with 501 additions and 135 deletions
--- a/packages/llama-cpp/default.nix
+++ b/packages/llama-cpp/default.nix
@@ -0,0 +1,42 @@
+{ pkgs }:
+(pkgs.llama-cpp.override {
+  cudaSupport = true;
+  blasSupport = true;
+  rocmSupport = false;
+  metalSupport = false;
+  vulkanSupport = true;
+}).overrideAttrs
+  (oldAttrs: rec {
+    version = "7343";
+    src = pkgs.fetchFromGitHub {
+      owner = "ggml-org";
+      repo = "llama.cpp";
+      tag = "b${version}";
+      hash = "sha256-hD8cyorU5NezRmKx+iN5gOD+3bAzS3IDVl7Ju5/zVHc=";
+      leaveDotGit = true;
+      postFetch = ''
+        git -C "$out" rev-parse --short HEAD > $out/COMMIT
+        find "$out" -name .git -print0 | xargs -0 rm -rf
+      '';
+    };
+
+    # Auto CPU Optimizations
+    cmakeFlags = (oldAttrs.cmakeFlags or [ ]) ++ [
+      "-DGGML_NATIVE=ON"
+      "-DGGML_CUDA_ENABLE_UNIFIED_MEMORY=1"
+      "-DCMAKE_CUDA_ARCHITECTURES=61" # GTX 1070 / GTX 1080ti
+    ];
+
+    # Disable Nix's march=native Stripping
+    preConfigure = ''
+      export NIX_ENFORCE_NO_NATIVE=0
+      ${oldAttrs.preConfigure or ""}
+    '';
+
+    # Apply Patches
+    patchFlags = [ "-p1" ];
+    patches = (oldAttrs.patches or [ ]) ++ [
+      ./oneof-not-unrecognized-schema.patch
+      ./additionalprops-unrecognized-schema.patch
+    ];
+  })