chore(llm): clean up models & fix llama-cpp issue

2025-12-10 12:12:50 -05:00
parent 30934c8f7c
commit c1a650a90e
12 changed files with 501 additions and 135 deletions
--- a/packages/llama-cpp/additionalprops-unrecognized-schema.patch
+++ b/packages/llama-cpp/additionalprops-unrecognized-schema.patch
@@ -0,0 +1,31 @@
+# This patch modifies the json-schema-to-grammar.cpp file to handle 'not: {}' constructs
+# specifically inside additionalProperties.
+#
+# Author: https://github.com/evanreichard
+
+diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp
+index c3b4e5d..ea24bc3 100644
+--- a/common/json-schema-to-grammar.cpp
+++ b/common/json-schema-to-grammar.cpp
+@@ -858,10 +858,19 @@ public:
+                     properties.emplace_back(prop.key(), prop.value());
+                 }
+             }
+            json additionalProps = schema.contains("additionalProperties") ? schema["additionalProperties"] : json();
+            if (additionalProps.is_object() && additionalProps.contains("not")) {
+                const auto& not_val = additionalProps["not"];
+                if (not_val.is_object() && not_val.empty()) {
+                    additionalProps.erase("not");
+                    if (additionalProps.empty()) {
+                        additionalProps = false;
+                    }
+                }
+            }
+             return _add_rule(rule_name,
+                 _build_object_rule(
+-                    properties, required, name,
+-                    schema.contains("additionalProperties") ? schema["additionalProperties"] : json()));
+                    properties, required, name, additionalProps));
+         } else if ((schema_type.is_null() || schema_type == "object" || schema_type == "string") && schema.contains("allOf")) {
+             std::unordered_set<std::string> required;
+             std::vector<std::pair<std::string, json>> properties;
--- a/packages/llama-cpp/default.nix
+++ b/packages/llama-cpp/default.nix
@@ -0,0 +1,42 @@
+{ pkgs }:
+(pkgs.llama-cpp.override {
+  cudaSupport = true;
+  blasSupport = true;
+  rocmSupport = false;
+  metalSupport = false;
+  vulkanSupport = true;
+}).overrideAttrs
+  (oldAttrs: rec {
+    version = "7343";
+    src = pkgs.fetchFromGitHub {
+      owner = "ggml-org";
+      repo = "llama.cpp";
+      tag = "b${version}";
+      hash = "sha256-hD8cyorU5NezRmKx+iN5gOD+3bAzS3IDVl7Ju5/zVHc=";
+      leaveDotGit = true;
+      postFetch = ''
+        git -C "$out" rev-parse --short HEAD > $out/COMMIT
+        find "$out" -name .git -print0 | xargs -0 rm -rf
+      '';
+    };
+
+    # Auto CPU Optimizations
+    cmakeFlags = (oldAttrs.cmakeFlags or [ ]) ++ [
+      "-DGGML_NATIVE=ON"
+      "-DGGML_CUDA_ENABLE_UNIFIED_MEMORY=1"
+      "-DCMAKE_CUDA_ARCHITECTURES=61" # GTX 1070 / GTX 1080ti
+    ];
+
+    # Disable Nix's march=native Stripping
+    preConfigure = ''
+      export NIX_ENFORCE_NO_NATIVE=0
+      ${oldAttrs.preConfigure or ""}
+    '';
+
+    # Apply Patches
+    patchFlags = [ "-p1" ];
+    patches = (oldAttrs.patches or [ ]) ++ [
+      ./oneof-not-unrecognized-schema.patch
+      ./additionalprops-unrecognized-schema.patch
+    ];
+  })
--- a/packages/llama-cpp/oneof-not-unrecognized-schema.patch
+++ b/packages/llama-cpp/oneof-not-unrecognized-schema.patch
@@ -0,0 +1,28 @@
+# This patch modifies the json-schema-to-grammar.cpp file to handle 'not: {}' constructs.
+#
+# Author: https://github.com/simaotwx
+# Reference: https://github.com/ggml-org/llama.cpp/issues/14227#issuecomment-3547740835
+
+diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp
+index 478aa1be7..ec0b3b73e 100644
+--- a/common/json-schema-to-grammar.cpp
+++ b/common/json-schema-to-grammar.cpp
+@@ -822,7 +822,17 @@ public:
+             return _add_rule(rule_name, _resolve_ref(schema["$ref"]));
+         } else if (schema.contains("oneOf") || schema.contains("anyOf")) {
+             std::vector<json> alt_schemas = schema.contains("oneOf") ? schema["oneOf"].get<std::vector<json>>() : schema["anyOf"].get<std::vector<json>>();
+-            return _add_rule(rule_name, _generate_union_rule(name, alt_schemas));
+            std::vector<json> filtered_schemas;
+            for (const auto& alt : alt_schemas) {
+                if (alt.is_object() && alt.contains("not")) {
+                    const auto& not_val = alt["not"];
+                    if (not_val.is_object() && not_val.empty()) {
+                        continue;
+                    }
+                }
+                filtered_schemas.push_back(alt);
+            }
+            return _add_rule(rule_name, _generate_union_rule(name, filtered_schemas));
+         } else if (schema_type.is_array()) {
+             std::vector<json> schema_types;
+             for (const auto & t : schema_type) {
--- a/packages/llama-swap/default.nix
+++ b/packages/llama-swap/default.nix
@@ -0,0 +1,143 @@
+{ lib
+, stdenv
+, buildGoModule
+, fetchFromGitHub
+, versionCheckHook
+, callPackage
+, nixosTests
+,
+}:
+
+let
+  canExecute = stdenv.buildPlatform.canExecute stdenv.hostPlatform;
+in
+buildGoModule (finalAttrs: {
+  pname = "llama-swap";
+  version = "176";
+
+  src = fetchFromGitHub {
+    owner = "mostlygeek";
+    repo = "llama-swap";
+    tag = "v${finalAttrs.version}";
+    hash = "sha256-nfkuaiEITOmpkiLft3iNW1VUexHwZ36c8gwcQKGANbQ=";
+    # populate values that require us to use git. By doing this in postFetch we
+    # can delete .git afterwards and maintain better reproducibility of the src.
+    leaveDotGit = true;
+    postFetch = ''
+      cd "$out"
+      git rev-parse HEAD > $out/COMMIT
+      # '0000-00-00T00:00:00Z'
+      date -u -d "@$(git log -1 --pretty=%ct)" "+'%Y-%m-%dT%H:%M:%SZ'" > $out/SOURCE_DATE_EPOCH
+      find "$out" -name .git -print0 | xargs -0 rm -rf
+    '';
+  };
+
+  vendorHash = "sha256-/EbFyuCVFxHTTO0UwSV3B/6PYUpudxB2FD8nNx1Bb+M=";
+
+  passthru.ui = callPackage ./ui.nix { llama-swap = finalAttrs.finalPackage; };
+  passthru.npmDepsHash = "sha256-RKPcMwJ0qVOgbTxoGryrLn7AW0Bfmv9WasoY+gw4B30=";
+
+  nativeBuildInputs = [
+    versionCheckHook
+  ];
+
+  # required for testing
+  __darwinAllowLocalNetworking = true;
+
+  ldflags = [
+    "-s"
+    "-w"
+    "-X main.version=${finalAttrs.version}"
+  ];
+
+  preBuild = ''
+    # ldflags based on metadata from git and source
+    ldflags+=" -X main.commit=$(cat COMMIT)"
+    ldflags+=" -X main.date=$(cat SOURCE_DATE_EPOCH)"
+
+    # copy for go:embed in proxy/ui_embed.go
+    cp -r ${finalAttrs.passthru.ui}/ui_dist proxy/
+  '';
+
+  excludedPackages = [
+    # regression testing tool
+    "misc/process-cmd-test"
+    # benchmark/regression testing tool
+    "misc/benchmark-chatcompletion"
+  ]
+  ++ lib.optionals (!canExecute) [
+    # some tests expect to execute `simple-something`; if it can't be executed
+    # it's unneeded
+    "misc/simple-responder"
+  ];
+
+  checkFlags =
+    let
+      skippedTests = lib.optionals (stdenv.isDarwin && stdenv.isx86_64) [
+        # Fail only on x86_64-darwin intermittently
+        # https://github.com/mostlygeek/llama-swap/issues/320
+        "TestProcess_AutomaticallyStartsUpstream"
+        "TestProcess_WaitOnMultipleStarts"
+        "TestProcess_BrokenModelConfig"
+        "TestProcess_UnloadAfterTTL"
+        "TestProcess_LowTTLValue"
+        "TestProcess_HTTPRequestsHaveTimeToFinish"
+        "TestProcess_SwapState"
+        "TestProcess_ShutdownInterruptsHealthCheck"
+        "TestProcess_ExitInterruptsHealthCheck"
+        "TestProcess_ConcurrencyLimit"
+        "TestProcess_StopImmediately"
+        "TestProcess_ForceStopWithKill"
+        "TestProcess_StopCmd"
+        "TestProcess_EnvironmentSetCorrectly"
+      ];
+    in
+    [ "-skip=^${builtins.concatStringsSep "$|^" skippedTests}$" ];
+
+  # some tests expect to execute `simple-something` and proxy/helpers_test.go
+  # checks the file exists
+  doCheck = canExecute;
+  preCheck = ''
+    mkdir build
+    ln -s "$GOPATH/bin/simple-responder" "./build/simple-responder_''${GOOS}_''${GOARCH}"
+  '';
+  postCheck = ''
+    rm "$GOPATH/bin/simple-responder"
+  '';
+
+  preInstall = ''
+    install -Dm444 -t "$out/share/llama-swap" config.example.yaml
+  '';
+
+  doInstallCheck = true;
+  versionCheckProgramArg = "-version";
+
+  passthru.tests.nixos = nixosTests.llama-swap;
+
+  meta = {
+    homepage = "https://github.com/mostlygeek/llama-swap";
+    changelog = "https://github.com/mostlygeek/llama-swap/releases/tag/${finalAttrs.src.tag}";
+    description = "Model swapping for llama.cpp (or any local OpenAPI compatible server)";
+    longDescription = ''
+      llama-swap is a light weight, transparent proxy server that provides
+      automatic model swapping to llama.cpp's server.
+
+      When a request is made to an OpenAI compatible endpoint, llama-swap will
+      extract the `model` value and load the appropriate server configuration to
+      serve it. If the wrong upstream server is running, it will be replaced
+      with the correct one. This is where the "swap" part comes in. The upstream
+      server is automatically swapped to the correct one to serve the request.
+
+      In the most basic configuration llama-swap handles one model at a time.
+      For more advanced use cases, the `groups` feature allows multiple models
+      to be loaded at the same time. You have complete control over how your
+      system resources are used.
+    '';
+    license = lib.licenses.mit;
+    mainProgram = "llama-swap";
+    maintainers = with lib.maintainers; [
+      jk
+      podium868909
+    ];
+  };
+})
--- a/packages/llama-swap/ui.nix
+++ b/packages/llama-swap/ui.nix
@@ -0,0 +1,25 @@
+{ llama-swap
+, buildNpmPackage
+,
+}:
+
+buildNpmPackage (finalAttrs: {
+  pname = "${llama-swap.pname}-ui";
+  inherit (llama-swap) version src npmDepsHash;
+
+  postPatch = ''
+    substituteInPlace vite.config.ts \
+      --replace-fail "../proxy/ui_dist" "${placeholder "out"}/ui_dist"
+  '';
+
+  sourceRoot = "${finalAttrs.src.name}/ui";
+
+  # bundled "ui_dist" doesn't need node_modules
+  postInstall = ''
+    rm -rf $out/lib
+  '';
+
+  meta = (removeAttrs llama-swap.meta [ "mainProgram" ]) // {
+    description = "${llama-swap.meta.description} - UI";
+  };
+})
--- a/packages/qwen-code/default.nix
+++ b/packages/qwen-code/default.nix
@@ -0,0 +1,91 @@
+{ lib
+, buildNpmPackage
+, fetchFromGitHub
+, jq
+, git
+, ripgrep
+, pkg-config
+, glib
+, libsecret
+, ...
+}:
+buildNpmPackage (finalAttrs: {
+  pname = "qwen-code";
+  version = "0.4.0-nightly.20251209.a6a57233";
+
+  src = fetchFromGitHub {
+    owner = "QwenLM";
+    repo = "qwen-code";
+    tag = "v${finalAttrs.version}";
+    hash = "sha256-s9m1IN6jDDbNPr/vI/UcrauYPiyQTDODarLP3EvnG3Y=";
+  };
+
+  npmDepsHash = "sha256-ngAjCCoHLPZ+GgBRmAKbRYaF7l+RK3YGf1kEkwFbyQg=";
+
+  nativeBuildInputs = [
+    jq
+    pkg-config
+    git
+  ];
+
+  buildInputs = [
+    ripgrep
+    glib
+    libsecret
+  ];
+
+  postPatch = ''
+    ${jq}/bin/jq '
+      del(.packages."node_modules/node-pty") |
+      del(.packages."node_modules/@lydell/node-pty") |
+      del(.packages."node_modules/@lydell/node-pty-darwin-arm64") |
+      del(.packages."node_modules/@lydell/node-pty-darwin-x64") |
+      del(.packages."node_modules/@lydell/node-pty-linux-arm64") |
+      del(.packages."node_modules/@lydell/node-pty-linux-x64") |
+      del(.packages."node_modules/@lydell/node-pty-win32-arm64") |
+      del(.packages."node_modules/@lydell/node-pty-win32-x64") |
+      del(.packages."node_modules/keytar") |
+      walk(
+        if type == "object" and has("dependencies") then
+          .dependencies |= with_entries(select(.key | (contains("node-pty") | not) and (contains("keytar") | not)))
+        elif type == "object" and has("optionalDependencies") then
+          .optionalDependencies |= with_entries(select(.key | (contains("node-pty") | not) and (contains("keytar") | not)))
+        else .
+        end
+      ) |
+      walk(
+        if type == "object" and has("peerDependencies") then
+          .peerDependencies |= with_entries(select(.key | (contains("node-pty") | not) and (contains("keytar") | not)))
+        else .
+        end
+      )
+    ' package-lock.json > package-lock.json.tmp && mv package-lock.json.tmp package-lock.json
+  '';
+
+  buildPhase = ''
+    runHook preBuild
+    npm run generate
+    npm run bundle
+    runHook postBuild
+  '';
+
+  installPhase = ''
+    runHook preInstall
+    mkdir -p $out/bin $out/share/qwen-code
+    cp -r dist/* $out/share/qwen-code/
+    npm prune --production
+    cp -r node_modules $out/share/qwen-code/
+    find $out/share/qwen-code/node_modules -type l -delete || true
+    patchShebangs $out/share/qwen-code
+    ln -s $out/share/qwen-code/cli.js $out/bin/qwen
+    runHook postInstall
+  '';
+
+  meta = {
+    description = "Coding agent that lives in digital world";
+    homepage = "https://github.com/QwenLM/qwen-code";
+    mainProgram = "qwen";
+    license = lib.licenses.asl20;
+    platforms = lib.platforms.all;
+  };
+})