chore(llm): clean up models & fix llama-cpp issue

This commit is contained in:
2025-12-10 12:12:50 -05:00
parent 30934c8f7c
commit c1a650a90e
12 changed files with 501 additions and 135 deletions

View File

@@ -0,0 +1,31 @@
# This patch modifies the json-schema-to-grammar.cpp file to handle 'not: {}' constructs
# specifically inside additionalProperties.
#
# Author: https://github.com/evanreichard
diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp
index c3b4e5d..ea24bc3 100644
--- a/common/json-schema-to-grammar.cpp
+++ b/common/json-schema-to-grammar.cpp
@@ -858,10 +858,19 @@ public:
properties.emplace_back(prop.key(), prop.value());
}
}
+ json additionalProps = schema.contains("additionalProperties") ? schema["additionalProperties"] : json();
+ if (additionalProps.is_object() && additionalProps.contains("not")) {
+ const auto& not_val = additionalProps["not"];
+ if (not_val.is_object() && not_val.empty()) {
+ additionalProps.erase("not");
+ if (additionalProps.empty()) {
+ additionalProps = false;
+ }
+ }
+ }
return _add_rule(rule_name,
_build_object_rule(
- properties, required, name,
- schema.contains("additionalProperties") ? schema["additionalProperties"] : json()));
+ properties, required, name, additionalProps));
} else if ((schema_type.is_null() || schema_type == "object" || schema_type == "string") && schema.contains("allOf")) {
std::unordered_set<std::string> required;
std::vector<std::pair<std::string, json>> properties;

View File

@@ -0,0 +1,42 @@
{ pkgs }:
(pkgs.llama-cpp.override {
cudaSupport = true;
blasSupport = true;
rocmSupport = false;
metalSupport = false;
vulkanSupport = true;
}).overrideAttrs
(oldAttrs: rec {
version = "7343";
src = pkgs.fetchFromGitHub {
owner = "ggml-org";
repo = "llama.cpp";
tag = "b${version}";
hash = "sha256-hD8cyorU5NezRmKx+iN5gOD+3bAzS3IDVl7Ju5/zVHc=";
leaveDotGit = true;
postFetch = ''
git -C "$out" rev-parse --short HEAD > $out/COMMIT
find "$out" -name .git -print0 | xargs -0 rm -rf
'';
};
# Auto CPU Optimizations
cmakeFlags = (oldAttrs.cmakeFlags or [ ]) ++ [
"-DGGML_NATIVE=ON"
"-DGGML_CUDA_ENABLE_UNIFIED_MEMORY=1"
"-DCMAKE_CUDA_ARCHITECTURES=61" # GTX 1070 / GTX 1080ti
];
# Disable Nix's march=native Stripping
preConfigure = ''
export NIX_ENFORCE_NO_NATIVE=0
${oldAttrs.preConfigure or ""}
'';
# Apply Patches
patchFlags = [ "-p1" ];
patches = (oldAttrs.patches or [ ]) ++ [
./oneof-not-unrecognized-schema.patch
./additionalprops-unrecognized-schema.patch
];
})

View File

@@ -0,0 +1,28 @@
# This patch modifies the json-schema-to-grammar.cpp file to handle 'not: {}' constructs.
#
# Author: https://github.com/simaotwx
# Reference: https://github.com/ggml-org/llama.cpp/issues/14227#issuecomment-3547740835
diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp
index 478aa1be7..ec0b3b73e 100644
--- a/common/json-schema-to-grammar.cpp
+++ b/common/json-schema-to-grammar.cpp
@@ -822,7 +822,17 @@ public:
return _add_rule(rule_name, _resolve_ref(schema["$ref"]));
} else if (schema.contains("oneOf") || schema.contains("anyOf")) {
std::vector<json> alt_schemas = schema.contains("oneOf") ? schema["oneOf"].get<std::vector<json>>() : schema["anyOf"].get<std::vector<json>>();
- return _add_rule(rule_name, _generate_union_rule(name, alt_schemas));
+ std::vector<json> filtered_schemas;
+ for (const auto& alt : alt_schemas) {
+ if (alt.is_object() && alt.contains("not")) {
+ const auto& not_val = alt["not"];
+ if (not_val.is_object() && not_val.empty()) {
+ continue;
+ }
+ }
+ filtered_schemas.push_back(alt);
+ }
+ return _add_rule(rule_name, _generate_union_rule(name, filtered_schemas));
} else if (schema_type.is_array()) {
std::vector<json> schema_types;
for (const auto & t : schema_type) {