chore(llm): clean up models & fix llama-cpp issue
This commit is contained in:
31
packages/llama-cpp/additionalprops-unrecognized-schema.patch
Normal file
31
packages/llama-cpp/additionalprops-unrecognized-schema.patch
Normal file
@@ -0,0 +1,31 @@
|
||||
# This patch modifies the json-schema-to-grammar.cpp file to handle 'not: {}' constructs
|
||||
# specifically inside additionalProperties.
|
||||
#
|
||||
# Author: https://github.com/evanreichard
|
||||
|
||||
diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp
|
||||
index c3b4e5d..ea24bc3 100644
|
||||
--- a/common/json-schema-to-grammar.cpp
|
||||
+++ b/common/json-schema-to-grammar.cpp
|
||||
@@ -858,10 +858,19 @@ public:
|
||||
properties.emplace_back(prop.key(), prop.value());
|
||||
}
|
||||
}
|
||||
+ json additionalProps = schema.contains("additionalProperties") ? schema["additionalProperties"] : json();
|
||||
+ if (additionalProps.is_object() && additionalProps.contains("not")) {
|
||||
+ const auto& not_val = additionalProps["not"];
|
||||
+ if (not_val.is_object() && not_val.empty()) {
|
||||
+ additionalProps.erase("not");
|
||||
+ if (additionalProps.empty()) {
|
||||
+ additionalProps = false;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
return _add_rule(rule_name,
|
||||
_build_object_rule(
|
||||
- properties, required, name,
|
||||
- schema.contains("additionalProperties") ? schema["additionalProperties"] : json()));
|
||||
+ properties, required, name, additionalProps));
|
||||
} else if ((schema_type.is_null() || schema_type == "object" || schema_type == "string") && schema.contains("allOf")) {
|
||||
std::unordered_set<std::string> required;
|
||||
std::vector<std::pair<std::string, json>> properties;
|
||||
42
packages/llama-cpp/default.nix
Normal file
42
packages/llama-cpp/default.nix
Normal file
@@ -0,0 +1,42 @@
|
||||
{ pkgs }:
|
||||
(pkgs.llama-cpp.override {
|
||||
cudaSupport = true;
|
||||
blasSupport = true;
|
||||
rocmSupport = false;
|
||||
metalSupport = false;
|
||||
vulkanSupport = true;
|
||||
}).overrideAttrs
|
||||
(oldAttrs: rec {
|
||||
version = "7343";
|
||||
src = pkgs.fetchFromGitHub {
|
||||
owner = "ggml-org";
|
||||
repo = "llama.cpp";
|
||||
tag = "b${version}";
|
||||
hash = "sha256-hD8cyorU5NezRmKx+iN5gOD+3bAzS3IDVl7Ju5/zVHc=";
|
||||
leaveDotGit = true;
|
||||
postFetch = ''
|
||||
git -C "$out" rev-parse --short HEAD > $out/COMMIT
|
||||
find "$out" -name .git -print0 | xargs -0 rm -rf
|
||||
'';
|
||||
};
|
||||
|
||||
# Auto CPU Optimizations
|
||||
cmakeFlags = (oldAttrs.cmakeFlags or [ ]) ++ [
|
||||
"-DGGML_NATIVE=ON"
|
||||
"-DGGML_CUDA_ENABLE_UNIFIED_MEMORY=1"
|
||||
"-DCMAKE_CUDA_ARCHITECTURES=61" # GTX 1070 / GTX 1080ti
|
||||
];
|
||||
|
||||
# Disable Nix's march=native Stripping
|
||||
preConfigure = ''
|
||||
export NIX_ENFORCE_NO_NATIVE=0
|
||||
${oldAttrs.preConfigure or ""}
|
||||
'';
|
||||
|
||||
# Apply Patches
|
||||
patchFlags = [ "-p1" ];
|
||||
patches = (oldAttrs.patches or [ ]) ++ [
|
||||
./oneof-not-unrecognized-schema.patch
|
||||
./additionalprops-unrecognized-schema.patch
|
||||
];
|
||||
})
|
||||
28
packages/llama-cpp/oneof-not-unrecognized-schema.patch
Normal file
28
packages/llama-cpp/oneof-not-unrecognized-schema.patch
Normal file
@@ -0,0 +1,28 @@
|
||||
# This patch modifies the json-schema-to-grammar.cpp file to handle 'not: {}' constructs.
|
||||
#
|
||||
# Author: https://github.com/simaotwx
|
||||
# Reference: https://github.com/ggml-org/llama.cpp/issues/14227#issuecomment-3547740835
|
||||
|
||||
diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp
|
||||
index 478aa1be7..ec0b3b73e 100644
|
||||
--- a/common/json-schema-to-grammar.cpp
|
||||
+++ b/common/json-schema-to-grammar.cpp
|
||||
@@ -822,7 +822,17 @@ public:
|
||||
return _add_rule(rule_name, _resolve_ref(schema["$ref"]));
|
||||
} else if (schema.contains("oneOf") || schema.contains("anyOf")) {
|
||||
std::vector<json> alt_schemas = schema.contains("oneOf") ? schema["oneOf"].get<std::vector<json>>() : schema["anyOf"].get<std::vector<json>>();
|
||||
- return _add_rule(rule_name, _generate_union_rule(name, alt_schemas));
|
||||
+ std::vector<json> filtered_schemas;
|
||||
+ for (const auto& alt : alt_schemas) {
|
||||
+ if (alt.is_object() && alt.contains("not")) {
|
||||
+ const auto& not_val = alt["not"];
|
||||
+ if (not_val.is_object() && not_val.empty()) {
|
||||
+ continue;
|
||||
+ }
|
||||
+ }
|
||||
+ filtered_schemas.push_back(alt);
|
||||
+ }
|
||||
+ return _add_rule(rule_name, _generate_union_rule(name, filtered_schemas));
|
||||
} else if (schema_type.is_array()) {
|
||||
std::vector<json> schema_types;
|
||||
for (const auto & t : schema_type) {
|
||||
143
packages/llama-swap/default.nix
Normal file
143
packages/llama-swap/default.nix
Normal file
@@ -0,0 +1,143 @@
|
||||
{ lib
|
||||
, stdenv
|
||||
, buildGoModule
|
||||
, fetchFromGitHub
|
||||
, versionCheckHook
|
||||
, callPackage
|
||||
, nixosTests
|
||||
,
|
||||
}:
|
||||
|
||||
let
|
||||
canExecute = stdenv.buildPlatform.canExecute stdenv.hostPlatform;
|
||||
in
|
||||
buildGoModule (finalAttrs: {
|
||||
pname = "llama-swap";
|
||||
version = "176";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "mostlygeek";
|
||||
repo = "llama-swap";
|
||||
tag = "v${finalAttrs.version}";
|
||||
hash = "sha256-nfkuaiEITOmpkiLft3iNW1VUexHwZ36c8gwcQKGANbQ=";
|
||||
# populate values that require us to use git. By doing this in postFetch we
|
||||
# can delete .git afterwards and maintain better reproducibility of the src.
|
||||
leaveDotGit = true;
|
||||
postFetch = ''
|
||||
cd "$out"
|
||||
git rev-parse HEAD > $out/COMMIT
|
||||
# '0000-00-00T00:00:00Z'
|
||||
date -u -d "@$(git log -1 --pretty=%ct)" "+'%Y-%m-%dT%H:%M:%SZ'" > $out/SOURCE_DATE_EPOCH
|
||||
find "$out" -name .git -print0 | xargs -0 rm -rf
|
||||
'';
|
||||
};
|
||||
|
||||
vendorHash = "sha256-/EbFyuCVFxHTTO0UwSV3B/6PYUpudxB2FD8nNx1Bb+M=";
|
||||
|
||||
passthru.ui = callPackage ./ui.nix { llama-swap = finalAttrs.finalPackage; };
|
||||
passthru.npmDepsHash = "sha256-RKPcMwJ0qVOgbTxoGryrLn7AW0Bfmv9WasoY+gw4B30=";
|
||||
|
||||
nativeBuildInputs = [
|
||||
versionCheckHook
|
||||
];
|
||||
|
||||
# required for testing
|
||||
__darwinAllowLocalNetworking = true;
|
||||
|
||||
ldflags = [
|
||||
"-s"
|
||||
"-w"
|
||||
"-X main.version=${finalAttrs.version}"
|
||||
];
|
||||
|
||||
preBuild = ''
|
||||
# ldflags based on metadata from git and source
|
||||
ldflags+=" -X main.commit=$(cat COMMIT)"
|
||||
ldflags+=" -X main.date=$(cat SOURCE_DATE_EPOCH)"
|
||||
|
||||
# copy for go:embed in proxy/ui_embed.go
|
||||
cp -r ${finalAttrs.passthru.ui}/ui_dist proxy/
|
||||
'';
|
||||
|
||||
excludedPackages = [
|
||||
# regression testing tool
|
||||
"misc/process-cmd-test"
|
||||
# benchmark/regression testing tool
|
||||
"misc/benchmark-chatcompletion"
|
||||
]
|
||||
++ lib.optionals (!canExecute) [
|
||||
# some tests expect to execute `simple-something`; if it can't be executed
|
||||
# it's unneeded
|
||||
"misc/simple-responder"
|
||||
];
|
||||
|
||||
checkFlags =
|
||||
let
|
||||
skippedTests = lib.optionals (stdenv.isDarwin && stdenv.isx86_64) [
|
||||
# Fail only on x86_64-darwin intermittently
|
||||
# https://github.com/mostlygeek/llama-swap/issues/320
|
||||
"TestProcess_AutomaticallyStartsUpstream"
|
||||
"TestProcess_WaitOnMultipleStarts"
|
||||
"TestProcess_BrokenModelConfig"
|
||||
"TestProcess_UnloadAfterTTL"
|
||||
"TestProcess_LowTTLValue"
|
||||
"TestProcess_HTTPRequestsHaveTimeToFinish"
|
||||
"TestProcess_SwapState"
|
||||
"TestProcess_ShutdownInterruptsHealthCheck"
|
||||
"TestProcess_ExitInterruptsHealthCheck"
|
||||
"TestProcess_ConcurrencyLimit"
|
||||
"TestProcess_StopImmediately"
|
||||
"TestProcess_ForceStopWithKill"
|
||||
"TestProcess_StopCmd"
|
||||
"TestProcess_EnvironmentSetCorrectly"
|
||||
];
|
||||
in
|
||||
[ "-skip=^${builtins.concatStringsSep "$|^" skippedTests}$" ];
|
||||
|
||||
# some tests expect to execute `simple-something` and proxy/helpers_test.go
|
||||
# checks the file exists
|
||||
doCheck = canExecute;
|
||||
preCheck = ''
|
||||
mkdir build
|
||||
ln -s "$GOPATH/bin/simple-responder" "./build/simple-responder_''${GOOS}_''${GOARCH}"
|
||||
'';
|
||||
postCheck = ''
|
||||
rm "$GOPATH/bin/simple-responder"
|
||||
'';
|
||||
|
||||
preInstall = ''
|
||||
install -Dm444 -t "$out/share/llama-swap" config.example.yaml
|
||||
'';
|
||||
|
||||
doInstallCheck = true;
|
||||
versionCheckProgramArg = "-version";
|
||||
|
||||
passthru.tests.nixos = nixosTests.llama-swap;
|
||||
|
||||
meta = {
|
||||
homepage = "https://github.com/mostlygeek/llama-swap";
|
||||
changelog = "https://github.com/mostlygeek/llama-swap/releases/tag/${finalAttrs.src.tag}";
|
||||
description = "Model swapping for llama.cpp (or any local OpenAPI compatible server)";
|
||||
longDescription = ''
|
||||
llama-swap is a light weight, transparent proxy server that provides
|
||||
automatic model swapping to llama.cpp's server.
|
||||
|
||||
When a request is made to an OpenAI compatible endpoint, llama-swap will
|
||||
extract the `model` value and load the appropriate server configuration to
|
||||
serve it. If the wrong upstream server is running, it will be replaced
|
||||
with the correct one. This is where the "swap" part comes in. The upstream
|
||||
server is automatically swapped to the correct one to serve the request.
|
||||
|
||||
In the most basic configuration llama-swap handles one model at a time.
|
||||
For more advanced use cases, the `groups` feature allows multiple models
|
||||
to be loaded at the same time. You have complete control over how your
|
||||
system resources are used.
|
||||
'';
|
||||
license = lib.licenses.mit;
|
||||
mainProgram = "llama-swap";
|
||||
maintainers = with lib.maintainers; [
|
||||
jk
|
||||
podium868909
|
||||
];
|
||||
};
|
||||
})
|
||||
25
packages/llama-swap/ui.nix
Normal file
25
packages/llama-swap/ui.nix
Normal file
@@ -0,0 +1,25 @@
|
||||
{ llama-swap
|
||||
, buildNpmPackage
|
||||
,
|
||||
}:
|
||||
|
||||
buildNpmPackage (finalAttrs: {
|
||||
pname = "${llama-swap.pname}-ui";
|
||||
inherit (llama-swap) version src npmDepsHash;
|
||||
|
||||
postPatch = ''
|
||||
substituteInPlace vite.config.ts \
|
||||
--replace-fail "../proxy/ui_dist" "${placeholder "out"}/ui_dist"
|
||||
'';
|
||||
|
||||
sourceRoot = "${finalAttrs.src.name}/ui";
|
||||
|
||||
# bundled "ui_dist" doesn't need node_modules
|
||||
postInstall = ''
|
||||
rm -rf $out/lib
|
||||
'';
|
||||
|
||||
meta = (removeAttrs llama-swap.meta [ "mainProgram" ]) // {
|
||||
description = "${llama-swap.meta.description} - UI";
|
||||
};
|
||||
})
|
||||
91
packages/qwen-code/default.nix
Normal file
91
packages/qwen-code/default.nix
Normal file
@@ -0,0 +1,91 @@
|
||||
{ lib
|
||||
, buildNpmPackage
|
||||
, fetchFromGitHub
|
||||
, jq
|
||||
, git
|
||||
, ripgrep
|
||||
, pkg-config
|
||||
, glib
|
||||
, libsecret
|
||||
, ...
|
||||
}:
|
||||
buildNpmPackage (finalAttrs: {
|
||||
pname = "qwen-code";
|
||||
version = "0.4.0-nightly.20251209.a6a57233";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "QwenLM";
|
||||
repo = "qwen-code";
|
||||
tag = "v${finalAttrs.version}";
|
||||
hash = "sha256-s9m1IN6jDDbNPr/vI/UcrauYPiyQTDODarLP3EvnG3Y=";
|
||||
};
|
||||
|
||||
npmDepsHash = "sha256-ngAjCCoHLPZ+GgBRmAKbRYaF7l+RK3YGf1kEkwFbyQg=";
|
||||
|
||||
nativeBuildInputs = [
|
||||
jq
|
||||
pkg-config
|
||||
git
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
ripgrep
|
||||
glib
|
||||
libsecret
|
||||
];
|
||||
|
||||
postPatch = ''
|
||||
${jq}/bin/jq '
|
||||
del(.packages."node_modules/node-pty") |
|
||||
del(.packages."node_modules/@lydell/node-pty") |
|
||||
del(.packages."node_modules/@lydell/node-pty-darwin-arm64") |
|
||||
del(.packages."node_modules/@lydell/node-pty-darwin-x64") |
|
||||
del(.packages."node_modules/@lydell/node-pty-linux-arm64") |
|
||||
del(.packages."node_modules/@lydell/node-pty-linux-x64") |
|
||||
del(.packages."node_modules/@lydell/node-pty-win32-arm64") |
|
||||
del(.packages."node_modules/@lydell/node-pty-win32-x64") |
|
||||
del(.packages."node_modules/keytar") |
|
||||
walk(
|
||||
if type == "object" and has("dependencies") then
|
||||
.dependencies |= with_entries(select(.key | (contains("node-pty") | not) and (contains("keytar") | not)))
|
||||
elif type == "object" and has("optionalDependencies") then
|
||||
.optionalDependencies |= with_entries(select(.key | (contains("node-pty") | not) and (contains("keytar") | not)))
|
||||
else .
|
||||
end
|
||||
) |
|
||||
walk(
|
||||
if type == "object" and has("peerDependencies") then
|
||||
.peerDependencies |= with_entries(select(.key | (contains("node-pty") | not) and (contains("keytar") | not)))
|
||||
else .
|
||||
end
|
||||
)
|
||||
' package-lock.json > package-lock.json.tmp && mv package-lock.json.tmp package-lock.json
|
||||
'';
|
||||
|
||||
buildPhase = ''
|
||||
runHook preBuild
|
||||
npm run generate
|
||||
npm run bundle
|
||||
runHook postBuild
|
||||
'';
|
||||
|
||||
installPhase = ''
|
||||
runHook preInstall
|
||||
mkdir -p $out/bin $out/share/qwen-code
|
||||
cp -r dist/* $out/share/qwen-code/
|
||||
npm prune --production
|
||||
cp -r node_modules $out/share/qwen-code/
|
||||
find $out/share/qwen-code/node_modules -type l -delete || true
|
||||
patchShebangs $out/share/qwen-code
|
||||
ln -s $out/share/qwen-code/cli.js $out/bin/qwen
|
||||
runHook postInstall
|
||||
'';
|
||||
|
||||
meta = {
|
||||
description = "Coding agent that lives in digital world";
|
||||
homepage = "https://github.com/QwenLM/qwen-code";
|
||||
mainProgram = "qwen";
|
||||
license = lib.licenses.asl20;
|
||||
platforms = lib.platforms.all;
|
||||
};
|
||||
})
|
||||
Reference in New Issue
Block a user