chore(llm): clean up models & fix llama-cpp issue
This commit is contained in:
@@ -35,7 +35,8 @@
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
outputs = inputs:
|
outputs =
|
||||||
|
inputs:
|
||||||
inputs.snowfall-lib.mkFlake {
|
inputs.snowfall-lib.mkFlake {
|
||||||
inherit inputs;
|
inherit inputs;
|
||||||
src = ./.;
|
src = ./.;
|
||||||
|
|||||||
@@ -1,4 +1,9 @@
|
|||||||
{ pkgs, lib, config, namespace, ... }:
|
{ pkgs
|
||||||
|
, lib
|
||||||
|
, config
|
||||||
|
, namespace
|
||||||
|
, ...
|
||||||
|
}:
|
||||||
let
|
let
|
||||||
inherit (lib.${namespace}) enabled;
|
inherit (lib.${namespace}) enabled;
|
||||||
in
|
in
|
||||||
@@ -11,15 +16,6 @@ in
|
|||||||
inherit (config.snowfallorg.user) name;
|
inherit (config.snowfallorg.user) name;
|
||||||
};
|
};
|
||||||
|
|
||||||
services = {
|
|
||||||
# TODO
|
|
||||||
# sops = {
|
|
||||||
# enable = true;
|
|
||||||
# defaultSopsFile = lib.snowfall.fs.get-file "secrets/mac-va-mbp-work/evanreichard/default.yaml";
|
|
||||||
# sshKeyPaths = [ "${config.home.homeDirectory}/.ssh/id_ed25519" ];
|
|
||||||
# };
|
|
||||||
};
|
|
||||||
|
|
||||||
programs = {
|
programs = {
|
||||||
graphical = {
|
graphical = {
|
||||||
ghostty = enabled;
|
ghostty = enabled;
|
||||||
@@ -47,6 +43,9 @@ in
|
|||||||
texliveSmall # Pandoc PDF Dep
|
texliveSmall # Pandoc PDF Dep
|
||||||
google-cloud-sdk
|
google-cloud-sdk
|
||||||
tldr
|
tldr
|
||||||
|
opencode
|
||||||
|
claude-code
|
||||||
|
reichard.qwen-code
|
||||||
];
|
];
|
||||||
|
|
||||||
# SQLite Configuration
|
# SQLite Configuration
|
||||||
|
|||||||
@@ -3,67 +3,67 @@ require("luasnip.loaders.from_vscode").lazy_load()
|
|||||||
|
|
||||||
-- Check Tab Completion
|
-- Check Tab Completion
|
||||||
local has_words_before = function()
|
local has_words_before = function()
|
||||||
local line, col = unpack(vim.api.nvim_win_get_cursor(0))
|
local line, col = unpack(vim.api.nvim_win_get_cursor(0))
|
||||||
return col ~= 0 and
|
return col ~= 0 and
|
||||||
vim.api.nvim_buf_get_lines(0, line - 1, line, true)[1]:sub(col,
|
vim.api.nvim_buf_get_lines(0, line - 1, line, true)[1]:sub(col,
|
||||||
col)
|
col)
|
||||||
:match("%s") == nil
|
:match("%s") == nil
|
||||||
end
|
end
|
||||||
|
|
||||||
cmp.setup({
|
cmp.setup({
|
||||||
snippet = {
|
snippet = {
|
||||||
expand = function(args) require'luasnip'.lsp_expand(args.body) end
|
expand = function(args) require 'luasnip'.lsp_expand(args.body) end
|
||||||
},
|
},
|
||||||
|
|
||||||
mapping = cmp.mapping.preset.insert({
|
mapping = cmp.mapping.preset.insert({
|
||||||
|
|
||||||
-- Tab Completion
|
-- Tab Completion
|
||||||
["<Tab>"] = cmp.mapping(function(fallback)
|
["<Tab>"] = cmp.mapping(function(fallback)
|
||||||
if cmp.visible() then
|
if cmp.visible() then
|
||||||
cmp.select_next_item()
|
cmp.select_next_item()
|
||||||
elseif has_words_before() then
|
elseif has_words_before() then
|
||||||
cmp.complete()
|
cmp.complete()
|
||||||
else
|
else
|
||||||
fallback()
|
fallback()
|
||||||
end
|
end
|
||||||
end, {"i", "s"}),
|
end, { "i", "s" }),
|
||||||
|
|
||||||
-- Reverse Tab Completion
|
-- Reverse Tab Completion
|
||||||
["<S-Tab>"] = cmp.mapping(function(fallback)
|
["<S-Tab>"] = cmp.mapping(function(fallback)
|
||||||
if cmp.visible() then
|
if cmp.visible() then
|
||||||
cmp.select_prev_item()
|
cmp.select_prev_item()
|
||||||
else
|
else
|
||||||
fallback()
|
fallback()
|
||||||
end
|
end
|
||||||
end, {"i", "s"}),
|
end, { "i", "s" }),
|
||||||
|
|
||||||
-- Misc Mappings
|
-- Misc Mappings
|
||||||
['<C-b>'] = cmp.mapping.scroll_docs(-4),
|
['<C-b>'] = cmp.mapping.scroll_docs(-4),
|
||||||
['<C-f>'] = cmp.mapping.scroll_docs(4),
|
['<C-f>'] = cmp.mapping.scroll_docs(4),
|
||||||
['<C-Space>'] = cmp.mapping.complete(),
|
['<C-Space>'] = cmp.mapping.complete(),
|
||||||
['<C-e>'] = cmp.mapping.abort(),
|
['<C-e>'] = cmp.mapping.abort(),
|
||||||
['<CR>'] = cmp.mapping.confirm({select = true})
|
['<CR>'] = cmp.mapping.confirm({ select = true })
|
||||||
|
|
||||||
}),
|
}),
|
||||||
|
|
||||||
-- Default Sources
|
-- Default Sources
|
||||||
sources = cmp.config.sources({
|
sources = cmp.config.sources({
|
||||||
{name = 'nvim_lsp'}, {name = 'luasnip'}, {name = 'path'},
|
{ name = 'nvim_lsp' }, { name = 'luasnip' }, { name = 'path' },
|
||||||
{name = 'buffer'}
|
{ name = 'buffer' }
|
||||||
})
|
})
|
||||||
|
|
||||||
})
|
})
|
||||||
|
|
||||||
-- Completion - `/` and `?`
|
-- Completion - `/` and `?`
|
||||||
cmp.setup.cmdline({'/', '?'}, {
|
cmp.setup.cmdline({ '/', '?' }, {
|
||||||
mapping = cmp.mapping.preset.cmdline(),
|
mapping = cmp.mapping.preset.cmdline(),
|
||||||
sources = {{name = 'buffer'}}
|
sources = { { name = 'buffer' } }
|
||||||
})
|
})
|
||||||
|
|
||||||
-- Completion = `:`
|
-- Completion = `:`
|
||||||
cmp.setup.cmdline(':', {
|
cmp.setup.cmdline(':', {
|
||||||
mapping = cmp.mapping.preset.cmdline(),
|
mapping = cmp.mapping.preset.cmdline(),
|
||||||
sources = cmp.config.sources({{name = 'path'}, {name = 'cmdline'}})
|
sources = cmp.config.sources({ { name = 'path' }, { name = 'cmdline' } })
|
||||||
})
|
})
|
||||||
|
|
||||||
-- Autopairs
|
-- Autopairs
|
||||||
|
|||||||
@@ -1,9 +1,10 @@
|
|||||||
local llm_endpoint = "https://llm-api.va.reichard.io"
|
local llm_endpoint = "https://llm-api.va.reichard.io"
|
||||||
local llm_model = "qwen3-coder-30b-instruct"
|
local llm_assistant_model = "gpt-oss-20b-thinking"
|
||||||
|
local llm_infill_model = "qwen2.5-coder-3b-instruct"
|
||||||
|
|
||||||
-- Default Llama - Toggle Llama & Copilot
|
-- Default Llama - Toggle Llama & Copilot
|
||||||
vim.g.copilot_filetypes = { ["*"] = false }
|
-- vim.g.copilot_filetypes = { ["*"] = false }
|
||||||
local current_mode = "llama"
|
local current_mode = "copilot"
|
||||||
local function toggle_llm_fim_provider()
|
local function toggle_llm_fim_provider()
|
||||||
if current_mode == "llama" then
|
if current_mode == "llama" then
|
||||||
vim.g.copilot_filetypes = { ["*"] = true }
|
vim.g.copilot_filetypes = { ["*"] = true }
|
||||||
@@ -24,8 +25,10 @@ vim.keymap.set("n", "<leader>cf", toggle_llm_fim_provider, { desc = "Toggle FIM
|
|||||||
-- Configure LLama LLM FIM
|
-- Configure LLama LLM FIM
|
||||||
vim.g.llama_config = {
|
vim.g.llama_config = {
|
||||||
endpoint = llm_endpoint .. "/infill",
|
endpoint = llm_endpoint .. "/infill",
|
||||||
model = llm_model,
|
model = llm_infill_model,
|
||||||
n_predict = 1024,
|
n_predict = 2048,
|
||||||
|
ring_n_chunks = 32,
|
||||||
|
enable_at_startup = false,
|
||||||
}
|
}
|
||||||
|
|
||||||
-- Configure Code Companion
|
-- Configure Code Companion
|
||||||
@@ -39,7 +42,7 @@ require("codecompanion").setup({
|
|||||||
return require("codecompanion.adapters").extend("openai_compatible", {
|
return require("codecompanion.adapters").extend("openai_compatible", {
|
||||||
name = "llama-swap",
|
name = "llama-swap",
|
||||||
formatted_name = "LlamaSwap",
|
formatted_name = "LlamaSwap",
|
||||||
schema = { model = { default = llm_model } },
|
schema = { model = { default = llm_assistant_model } },
|
||||||
env = { url = llm_endpoint },
|
env = { url = llm_endpoint },
|
||||||
})
|
})
|
||||||
end,
|
end,
|
||||||
|
|||||||
31
packages/llama-cpp/additionalprops-unrecognized-schema.patch
Normal file
31
packages/llama-cpp/additionalprops-unrecognized-schema.patch
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
# This patch modifies the json-schema-to-grammar.cpp file to handle 'not: {}' constructs
|
||||||
|
# specifically inside additionalProperties.
|
||||||
|
#
|
||||||
|
# Author: https://github.com/evanreichard
|
||||||
|
|
||||||
|
diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp
|
||||||
|
index c3b4e5d..ea24bc3 100644
|
||||||
|
--- a/common/json-schema-to-grammar.cpp
|
||||||
|
+++ b/common/json-schema-to-grammar.cpp
|
||||||
|
@@ -858,10 +858,19 @@ public:
|
||||||
|
properties.emplace_back(prop.key(), prop.value());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
+ json additionalProps = schema.contains("additionalProperties") ? schema["additionalProperties"] : json();
|
||||||
|
+ if (additionalProps.is_object() && additionalProps.contains("not")) {
|
||||||
|
+ const auto& not_val = additionalProps["not"];
|
||||||
|
+ if (not_val.is_object() && not_val.empty()) {
|
||||||
|
+ additionalProps.erase("not");
|
||||||
|
+ if (additionalProps.empty()) {
|
||||||
|
+ additionalProps = false;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
return _add_rule(rule_name,
|
||||||
|
_build_object_rule(
|
||||||
|
- properties, required, name,
|
||||||
|
- schema.contains("additionalProperties") ? schema["additionalProperties"] : json()));
|
||||||
|
+ properties, required, name, additionalProps));
|
||||||
|
} else if ((schema_type.is_null() || schema_type == "object" || schema_type == "string") && schema.contains("allOf")) {
|
||||||
|
std::unordered_set<std::string> required;
|
||||||
|
std::vector<std::pair<std::string, json>> properties;
|
||||||
42
packages/llama-cpp/default.nix
Normal file
42
packages/llama-cpp/default.nix
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
{ pkgs }:
|
||||||
|
(pkgs.llama-cpp.override {
|
||||||
|
cudaSupport = true;
|
||||||
|
blasSupport = true;
|
||||||
|
rocmSupport = false;
|
||||||
|
metalSupport = false;
|
||||||
|
vulkanSupport = true;
|
||||||
|
}).overrideAttrs
|
||||||
|
(oldAttrs: rec {
|
||||||
|
version = "7343";
|
||||||
|
src = pkgs.fetchFromGitHub {
|
||||||
|
owner = "ggml-org";
|
||||||
|
repo = "llama.cpp";
|
||||||
|
tag = "b${version}";
|
||||||
|
hash = "sha256-hD8cyorU5NezRmKx+iN5gOD+3bAzS3IDVl7Ju5/zVHc=";
|
||||||
|
leaveDotGit = true;
|
||||||
|
postFetch = ''
|
||||||
|
git -C "$out" rev-parse --short HEAD > $out/COMMIT
|
||||||
|
find "$out" -name .git -print0 | xargs -0 rm -rf
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
|
# Auto CPU Optimizations
|
||||||
|
cmakeFlags = (oldAttrs.cmakeFlags or [ ]) ++ [
|
||||||
|
"-DGGML_NATIVE=ON"
|
||||||
|
"-DGGML_CUDA_ENABLE_UNIFIED_MEMORY=1"
|
||||||
|
"-DCMAKE_CUDA_ARCHITECTURES=61" # GTX 1070 / GTX 1080ti
|
||||||
|
];
|
||||||
|
|
||||||
|
# Disable Nix's march=native Stripping
|
||||||
|
preConfigure = ''
|
||||||
|
export NIX_ENFORCE_NO_NATIVE=0
|
||||||
|
${oldAttrs.preConfigure or ""}
|
||||||
|
'';
|
||||||
|
|
||||||
|
# Apply Patches
|
||||||
|
patchFlags = [ "-p1" ];
|
||||||
|
patches = (oldAttrs.patches or [ ]) ++ [
|
||||||
|
./oneof-not-unrecognized-schema.patch
|
||||||
|
./additionalprops-unrecognized-schema.patch
|
||||||
|
];
|
||||||
|
})
|
||||||
28
packages/llama-cpp/oneof-not-unrecognized-schema.patch
Normal file
28
packages/llama-cpp/oneof-not-unrecognized-schema.patch
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
# This patch modifies the json-schema-to-grammar.cpp file to handle 'not: {}' constructs.
|
||||||
|
#
|
||||||
|
# Author: https://github.com/simaotwx
|
||||||
|
# Reference: https://github.com/ggml-org/llama.cpp/issues/14227#issuecomment-3547740835
|
||||||
|
|
||||||
|
diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp
|
||||||
|
index 478aa1be7..ec0b3b73e 100644
|
||||||
|
--- a/common/json-schema-to-grammar.cpp
|
||||||
|
+++ b/common/json-schema-to-grammar.cpp
|
||||||
|
@@ -822,7 +822,17 @@ public:
|
||||||
|
return _add_rule(rule_name, _resolve_ref(schema["$ref"]));
|
||||||
|
} else if (schema.contains("oneOf") || schema.contains("anyOf")) {
|
||||||
|
std::vector<json> alt_schemas = schema.contains("oneOf") ? schema["oneOf"].get<std::vector<json>>() : schema["anyOf"].get<std::vector<json>>();
|
||||||
|
- return _add_rule(rule_name, _generate_union_rule(name, alt_schemas));
|
||||||
|
+ std::vector<json> filtered_schemas;
|
||||||
|
+ for (const auto& alt : alt_schemas) {
|
||||||
|
+ if (alt.is_object() && alt.contains("not")) {
|
||||||
|
+ const auto& not_val = alt["not"];
|
||||||
|
+ if (not_val.is_object() && not_val.empty()) {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ filtered_schemas.push_back(alt);
|
||||||
|
+ }
|
||||||
|
+ return _add_rule(rule_name, _generate_union_rule(name, filtered_schemas));
|
||||||
|
} else if (schema_type.is_array()) {
|
||||||
|
std::vector<json> schema_types;
|
||||||
|
for (const auto & t : schema_type) {
|
||||||
143
packages/llama-swap/default.nix
Normal file
143
packages/llama-swap/default.nix
Normal file
@@ -0,0 +1,143 @@
|
|||||||
|
{ lib
|
||||||
|
, stdenv
|
||||||
|
, buildGoModule
|
||||||
|
, fetchFromGitHub
|
||||||
|
, versionCheckHook
|
||||||
|
, callPackage
|
||||||
|
, nixosTests
|
||||||
|
,
|
||||||
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
canExecute = stdenv.buildPlatform.canExecute stdenv.hostPlatform;
|
||||||
|
in
|
||||||
|
buildGoModule (finalAttrs: {
|
||||||
|
pname = "llama-swap";
|
||||||
|
version = "176";
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "mostlygeek";
|
||||||
|
repo = "llama-swap";
|
||||||
|
tag = "v${finalAttrs.version}";
|
||||||
|
hash = "sha256-nfkuaiEITOmpkiLft3iNW1VUexHwZ36c8gwcQKGANbQ=";
|
||||||
|
# populate values that require us to use git. By doing this in postFetch we
|
||||||
|
# can delete .git afterwards and maintain better reproducibility of the src.
|
||||||
|
leaveDotGit = true;
|
||||||
|
postFetch = ''
|
||||||
|
cd "$out"
|
||||||
|
git rev-parse HEAD > $out/COMMIT
|
||||||
|
# '0000-00-00T00:00:00Z'
|
||||||
|
date -u -d "@$(git log -1 --pretty=%ct)" "+'%Y-%m-%dT%H:%M:%SZ'" > $out/SOURCE_DATE_EPOCH
|
||||||
|
find "$out" -name .git -print0 | xargs -0 rm -rf
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
|
vendorHash = "sha256-/EbFyuCVFxHTTO0UwSV3B/6PYUpudxB2FD8nNx1Bb+M=";
|
||||||
|
|
||||||
|
passthru.ui = callPackage ./ui.nix { llama-swap = finalAttrs.finalPackage; };
|
||||||
|
passthru.npmDepsHash = "sha256-RKPcMwJ0qVOgbTxoGryrLn7AW0Bfmv9WasoY+gw4B30=";
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
versionCheckHook
|
||||||
|
];
|
||||||
|
|
||||||
|
# required for testing
|
||||||
|
__darwinAllowLocalNetworking = true;
|
||||||
|
|
||||||
|
ldflags = [
|
||||||
|
"-s"
|
||||||
|
"-w"
|
||||||
|
"-X main.version=${finalAttrs.version}"
|
||||||
|
];
|
||||||
|
|
||||||
|
preBuild = ''
|
||||||
|
# ldflags based on metadata from git and source
|
||||||
|
ldflags+=" -X main.commit=$(cat COMMIT)"
|
||||||
|
ldflags+=" -X main.date=$(cat SOURCE_DATE_EPOCH)"
|
||||||
|
|
||||||
|
# copy for go:embed in proxy/ui_embed.go
|
||||||
|
cp -r ${finalAttrs.passthru.ui}/ui_dist proxy/
|
||||||
|
'';
|
||||||
|
|
||||||
|
excludedPackages = [
|
||||||
|
# regression testing tool
|
||||||
|
"misc/process-cmd-test"
|
||||||
|
# benchmark/regression testing tool
|
||||||
|
"misc/benchmark-chatcompletion"
|
||||||
|
]
|
||||||
|
++ lib.optionals (!canExecute) [
|
||||||
|
# some tests expect to execute `simple-something`; if it can't be executed
|
||||||
|
# it's unneeded
|
||||||
|
"misc/simple-responder"
|
||||||
|
];
|
||||||
|
|
||||||
|
checkFlags =
|
||||||
|
let
|
||||||
|
skippedTests = lib.optionals (stdenv.isDarwin && stdenv.isx86_64) [
|
||||||
|
# Fail only on x86_64-darwin intermittently
|
||||||
|
# https://github.com/mostlygeek/llama-swap/issues/320
|
||||||
|
"TestProcess_AutomaticallyStartsUpstream"
|
||||||
|
"TestProcess_WaitOnMultipleStarts"
|
||||||
|
"TestProcess_BrokenModelConfig"
|
||||||
|
"TestProcess_UnloadAfterTTL"
|
||||||
|
"TestProcess_LowTTLValue"
|
||||||
|
"TestProcess_HTTPRequestsHaveTimeToFinish"
|
||||||
|
"TestProcess_SwapState"
|
||||||
|
"TestProcess_ShutdownInterruptsHealthCheck"
|
||||||
|
"TestProcess_ExitInterruptsHealthCheck"
|
||||||
|
"TestProcess_ConcurrencyLimit"
|
||||||
|
"TestProcess_StopImmediately"
|
||||||
|
"TestProcess_ForceStopWithKill"
|
||||||
|
"TestProcess_StopCmd"
|
||||||
|
"TestProcess_EnvironmentSetCorrectly"
|
||||||
|
];
|
||||||
|
in
|
||||||
|
[ "-skip=^${builtins.concatStringsSep "$|^" skippedTests}$" ];
|
||||||
|
|
||||||
|
# some tests expect to execute `simple-something` and proxy/helpers_test.go
|
||||||
|
# checks the file exists
|
||||||
|
doCheck = canExecute;
|
||||||
|
preCheck = ''
|
||||||
|
mkdir build
|
||||||
|
ln -s "$GOPATH/bin/simple-responder" "./build/simple-responder_''${GOOS}_''${GOARCH}"
|
||||||
|
'';
|
||||||
|
postCheck = ''
|
||||||
|
rm "$GOPATH/bin/simple-responder"
|
||||||
|
'';
|
||||||
|
|
||||||
|
preInstall = ''
|
||||||
|
install -Dm444 -t "$out/share/llama-swap" config.example.yaml
|
||||||
|
'';
|
||||||
|
|
||||||
|
doInstallCheck = true;
|
||||||
|
versionCheckProgramArg = "-version";
|
||||||
|
|
||||||
|
passthru.tests.nixos = nixosTests.llama-swap;
|
||||||
|
|
||||||
|
meta = {
|
||||||
|
homepage = "https://github.com/mostlygeek/llama-swap";
|
||||||
|
changelog = "https://github.com/mostlygeek/llama-swap/releases/tag/${finalAttrs.src.tag}";
|
||||||
|
description = "Model swapping for llama.cpp (or any local OpenAPI compatible server)";
|
||||||
|
longDescription = ''
|
||||||
|
llama-swap is a light weight, transparent proxy server that provides
|
||||||
|
automatic model swapping to llama.cpp's server.
|
||||||
|
|
||||||
|
When a request is made to an OpenAI compatible endpoint, llama-swap will
|
||||||
|
extract the `model` value and load the appropriate server configuration to
|
||||||
|
serve it. If the wrong upstream server is running, it will be replaced
|
||||||
|
with the correct one. This is where the "swap" part comes in. The upstream
|
||||||
|
server is automatically swapped to the correct one to serve the request.
|
||||||
|
|
||||||
|
In the most basic configuration llama-swap handles one model at a time.
|
||||||
|
For more advanced use cases, the `groups` feature allows multiple models
|
||||||
|
to be loaded at the same time. You have complete control over how your
|
||||||
|
system resources are used.
|
||||||
|
'';
|
||||||
|
license = lib.licenses.mit;
|
||||||
|
mainProgram = "llama-swap";
|
||||||
|
maintainers = with lib.maintainers; [
|
||||||
|
jk
|
||||||
|
podium868909
|
||||||
|
];
|
||||||
|
};
|
||||||
|
})
|
||||||
25
packages/llama-swap/ui.nix
Normal file
25
packages/llama-swap/ui.nix
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
{ llama-swap
|
||||||
|
, buildNpmPackage
|
||||||
|
,
|
||||||
|
}:
|
||||||
|
|
||||||
|
buildNpmPackage (finalAttrs: {
|
||||||
|
pname = "${llama-swap.pname}-ui";
|
||||||
|
inherit (llama-swap) version src npmDepsHash;
|
||||||
|
|
||||||
|
postPatch = ''
|
||||||
|
substituteInPlace vite.config.ts \
|
||||||
|
--replace-fail "../proxy/ui_dist" "${placeholder "out"}/ui_dist"
|
||||||
|
'';
|
||||||
|
|
||||||
|
sourceRoot = "${finalAttrs.src.name}/ui";
|
||||||
|
|
||||||
|
# bundled "ui_dist" doesn't need node_modules
|
||||||
|
postInstall = ''
|
||||||
|
rm -rf $out/lib
|
||||||
|
'';
|
||||||
|
|
||||||
|
meta = (removeAttrs llama-swap.meta [ "mainProgram" ]) // {
|
||||||
|
description = "${llama-swap.meta.description} - UI";
|
||||||
|
};
|
||||||
|
})
|
||||||
91
packages/qwen-code/default.nix
Normal file
91
packages/qwen-code/default.nix
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
{ lib
|
||||||
|
, buildNpmPackage
|
||||||
|
, fetchFromGitHub
|
||||||
|
, jq
|
||||||
|
, git
|
||||||
|
, ripgrep
|
||||||
|
, pkg-config
|
||||||
|
, glib
|
||||||
|
, libsecret
|
||||||
|
, ...
|
||||||
|
}:
|
||||||
|
buildNpmPackage (finalAttrs: {
|
||||||
|
pname = "qwen-code";
|
||||||
|
version = "0.4.0-nightly.20251209.a6a57233";
|
||||||
|
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "QwenLM";
|
||||||
|
repo = "qwen-code";
|
||||||
|
tag = "v${finalAttrs.version}";
|
||||||
|
hash = "sha256-s9m1IN6jDDbNPr/vI/UcrauYPiyQTDODarLP3EvnG3Y=";
|
||||||
|
};
|
||||||
|
|
||||||
|
npmDepsHash = "sha256-ngAjCCoHLPZ+GgBRmAKbRYaF7l+RK3YGf1kEkwFbyQg=";
|
||||||
|
|
||||||
|
nativeBuildInputs = [
|
||||||
|
jq
|
||||||
|
pkg-config
|
||||||
|
git
|
||||||
|
];
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
ripgrep
|
||||||
|
glib
|
||||||
|
libsecret
|
||||||
|
];
|
||||||
|
|
||||||
|
postPatch = ''
|
||||||
|
${jq}/bin/jq '
|
||||||
|
del(.packages."node_modules/node-pty") |
|
||||||
|
del(.packages."node_modules/@lydell/node-pty") |
|
||||||
|
del(.packages."node_modules/@lydell/node-pty-darwin-arm64") |
|
||||||
|
del(.packages."node_modules/@lydell/node-pty-darwin-x64") |
|
||||||
|
del(.packages."node_modules/@lydell/node-pty-linux-arm64") |
|
||||||
|
del(.packages."node_modules/@lydell/node-pty-linux-x64") |
|
||||||
|
del(.packages."node_modules/@lydell/node-pty-win32-arm64") |
|
||||||
|
del(.packages."node_modules/@lydell/node-pty-win32-x64") |
|
||||||
|
del(.packages."node_modules/keytar") |
|
||||||
|
walk(
|
||||||
|
if type == "object" and has("dependencies") then
|
||||||
|
.dependencies |= with_entries(select(.key | (contains("node-pty") | not) and (contains("keytar") | not)))
|
||||||
|
elif type == "object" and has("optionalDependencies") then
|
||||||
|
.optionalDependencies |= with_entries(select(.key | (contains("node-pty") | not) and (contains("keytar") | not)))
|
||||||
|
else .
|
||||||
|
end
|
||||||
|
) |
|
||||||
|
walk(
|
||||||
|
if type == "object" and has("peerDependencies") then
|
||||||
|
.peerDependencies |= with_entries(select(.key | (contains("node-pty") | not) and (contains("keytar") | not)))
|
||||||
|
else .
|
||||||
|
end
|
||||||
|
)
|
||||||
|
' package-lock.json > package-lock.json.tmp && mv package-lock.json.tmp package-lock.json
|
||||||
|
'';
|
||||||
|
|
||||||
|
buildPhase = ''
|
||||||
|
runHook preBuild
|
||||||
|
npm run generate
|
||||||
|
npm run bundle
|
||||||
|
runHook postBuild
|
||||||
|
'';
|
||||||
|
|
||||||
|
installPhase = ''
|
||||||
|
runHook preInstall
|
||||||
|
mkdir -p $out/bin $out/share/qwen-code
|
||||||
|
cp -r dist/* $out/share/qwen-code/
|
||||||
|
npm prune --production
|
||||||
|
cp -r node_modules $out/share/qwen-code/
|
||||||
|
find $out/share/qwen-code/node_modules -type l -delete || true
|
||||||
|
patchShebangs $out/share/qwen-code
|
||||||
|
ln -s $out/share/qwen-code/cli.js $out/bin/qwen
|
||||||
|
runHook postInstall
|
||||||
|
'';
|
||||||
|
|
||||||
|
meta = {
|
||||||
|
description = "Coding agent that lives in digital world";
|
||||||
|
homepage = "https://github.com/QwenLM/qwen-code";
|
||||||
|
mainProgram = "qwen";
|
||||||
|
license = lib.licenses.asl20;
|
||||||
|
platforms = lib.platforms.all;
|
||||||
|
};
|
||||||
|
})
|
||||||
@@ -11,45 +11,22 @@ in
|
|||||||
system.stateVersion = "25.11";
|
system.stateVersion = "25.11";
|
||||||
time.timeZone = "America/New_York";
|
time.timeZone = "America/New_York";
|
||||||
hardware.nvidia-container-toolkit.enable = true;
|
hardware.nvidia-container-toolkit.enable = true;
|
||||||
|
security.pam.loginLimits = [
|
||||||
|
{
|
||||||
|
domain = "*";
|
||||||
|
type = "soft";
|
||||||
|
item = "memlock";
|
||||||
|
value = "unlimited";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
domain = "*";
|
||||||
|
type = "hard";
|
||||||
|
item = "memlock";
|
||||||
|
value = "unlimited";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
nixpkgs.config = {
|
nixpkgs.config.allowUnfree = true;
|
||||||
allowUnfree = true;
|
|
||||||
packageOverrides = pkgs: {
|
|
||||||
llama-cpp =
|
|
||||||
(pkgs.llama-cpp.override {
|
|
||||||
cudaSupport = true;
|
|
||||||
blasSupport = true;
|
|
||||||
rocmSupport = false;
|
|
||||||
metalSupport = false;
|
|
||||||
vulkanSupport = true;
|
|
||||||
}).overrideAttrs
|
|
||||||
(oldAttrs: rec {
|
|
||||||
version = "7278";
|
|
||||||
src = pkgs.fetchFromGitHub {
|
|
||||||
owner = "ggml-org";
|
|
||||||
repo = "llama.cpp";
|
|
||||||
tag = "b${version}";
|
|
||||||
hash = "sha256-Gxi/sUIuVvX5+mcZj9vCvUgODsWPAFzESQz8TjTe/Mk=";
|
|
||||||
leaveDotGit = true;
|
|
||||||
postFetch = ''
|
|
||||||
git -C "$out" rev-parse --short HEAD > $out/COMMIT
|
|
||||||
find "$out" -name .git -print0 | xargs -0 rm -rf
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
# Auto CPU Optimizations
|
|
||||||
cmakeFlags = (oldAttrs.cmakeFlags or [ ]) ++ [
|
|
||||||
"-DGGML_NATIVE=ON"
|
|
||||||
"-DGGML_CUDA_ENABLE_UNIFIED_MEMORY=1"
|
|
||||||
"-DCMAKE_CUDA_ARCHITECTURES=61" # GTX 1070 / GTX 1080ti
|
|
||||||
];
|
|
||||||
# Disable Nix's march=native Stripping
|
|
||||||
preConfigure = ''
|
|
||||||
export NIX_ENFORCE_NO_NATIVE=0
|
|
||||||
${oldAttrs.preConfigure or ""}
|
|
||||||
'';
|
|
||||||
});
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
fileSystems."/mnt/ssd" = {
|
fileSystems."/mnt/ssd" = {
|
||||||
device = "/dev/disk/by-id/ata-Samsung_SSD_870_EVO_1TB_S6PTNZ0R620739L-part1";
|
device = "/dev/disk/by-id/ata-Samsung_SSD_870_EVO_1TB_S6PTNZ0R620739L-part1";
|
||||||
@@ -106,78 +83,103 @@ in
|
|||||||
virtualisation = {
|
virtualisation = {
|
||||||
podman = enabled;
|
podman = enabled;
|
||||||
};
|
};
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
systemd.services.llama-swap.serviceConfig.LimitMEMLOCK = "infinity";
|
||||||
services.llama-swap = {
|
services.llama-swap = {
|
||||||
enable = true;
|
enable = true;
|
||||||
openFirewall = true;
|
openFirewall = true;
|
||||||
|
package = pkgs.reichard.llama-swap;
|
||||||
settings = {
|
settings = {
|
||||||
models = {
|
models = {
|
||||||
# https://huggingface.co/unsloth/SmolLM3-3B-128K-GGUF/tree/main
|
|
||||||
"smollm3-3b-instruct" = {
|
|
||||||
name = "SmolLM3(3B) - Instruct";
|
|
||||||
cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/SmolLM3-3B-128K-UD-Q4_K_XL.gguf --ctx-size 98304 --temp 0.6 --top-p 0.95 --reasoning-budget 0 -sm none";
|
|
||||||
};
|
|
||||||
|
|
||||||
# https://huggingface.co/unsloth/Qwen3-Next-80B-A3B-Instruct-GGUF/tree/main
|
|
||||||
"qwen3-next-80b-instruct" = {
|
|
||||||
name = "Qwen3 Next (80B) - Instruct";
|
|
||||||
cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-Next-80B-A3B-Instruct-UD-Q4_K_XL.gguf --ctx-size 32768 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 -sm none -ncmoe 39";
|
|
||||||
};
|
|
||||||
|
|
||||||
# https://huggingface.co/mradermacher/gpt-oss-20b-heretic-GGUF/tree/main
|
# https://huggingface.co/mradermacher/gpt-oss-20b-heretic-GGUF/tree/main
|
||||||
"gpt-oss-20b-thinking" = {
|
"gpt-oss-20b-thinking" = {
|
||||||
name = "GPT OSS (20B) - Thinking";
|
name = "GPT OSS (20B) - Thinking";
|
||||||
cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/gpt-oss-20b-heretic-MXFP4.gguf --ctx-size 128000 --chat-template-kwargs '{\"reasoning_effort\":\"low\"}'";
|
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/gpt-oss-20b-heretic-MXFP4.gguf --ctx-size 128000 -ts 75,25 --mlock --chat-template-kwargs '{\"reasoning_effort\":\"low\"}'";
|
||||||
};
|
aliases = [
|
||||||
|
"claude-sonnet-4-5"
|
||||||
# https://huggingface.co/unsloth/ERNIE-4.5-21B-A3B-PT-GGUF/tree/main
|
"claude-sonnet-4-5-20250929"
|
||||||
"ernie4.5-21b-instruct" = {
|
"claude-haiku-4-5"
|
||||||
name = "ERNIE4.5 (21B) - Instruct";
|
"claude-haiku-4-5-20251001"
|
||||||
cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/ERNIE-4.5-21B-A3B-PT-UD-Q4_K_XL.gguf --ctx-size 98304 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20";
|
"claude-opus-4-5"
|
||||||
};
|
"claude-opus-4-5-20251101"
|
||||||
|
];
|
||||||
# https://huggingface.co/unsloth/Qwen2.5-Coder-7B-Instruct-128K-GGUF/tree/main
|
|
||||||
"qwen2.5-coder-7b-instruct" = {
|
|
||||||
name = "Qwen2.5 Coder (7B) - Instruct";
|
|
||||||
cmd = "${pkgs.llama-cpp}/bin/llama-server -m /mnt/ssd/Models/Qwen2.5-Coder-7B-Instruct-Q8_0.gguf --fim-qwen-7b-default --ctx-size 131072 --port \${PORT}";
|
|
||||||
};
|
};
|
||||||
|
|
||||||
# https://huggingface.co/unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF/tree/main
|
# https://huggingface.co/unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF/tree/main
|
||||||
"qwen3-coder-30b-instruct" = {
|
"qwen3-coder-30b-instruct" = {
|
||||||
name = "Qwen3 Coder (30B) - Instruct";
|
name = "Qwen3 Coder (30B) - Instruct";
|
||||||
cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-Coder-30B-A3B-Instruct-UD-Q4_K_XL.gguf --ctx-size 55000 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 --cache-type-k q4_0 --cache-type-v q4_0";
|
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-Coder-30B-A3B-Instruct-UD-IQ2_M.gguf --ctx-size 262144 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 --repeat-penalty 1.05 --cache-type-k q4_0 --cache-type-v q4_0 --mlock";
|
||||||
};
|
};
|
||||||
|
|
||||||
# https://huggingface.co/unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF/tree/main
|
# https://huggingface.co/unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF/tree/main
|
||||||
"qwen3-30b-instruct" = {
|
"qwen3-30b-2507-instruct" = {
|
||||||
name = "Qwen3 (30B) - Instruct";
|
name = "Qwen3 2507 (30B) - Instruct";
|
||||||
cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf --ctx-size 16384 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 --cache-type-k q4_0 --cache-type-v q4_0";
|
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-30B-A3B-Instruct-2507-UD-IQ2_M.gguf --ctx-size 262144 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 --repeat-penalty 1.05 --cache-type-k q4_0 --cache-type-v q4_0";
|
||||||
};
|
};
|
||||||
|
|
||||||
# https://huggingface.co/unsloth/Qwen3-30B-A3B-Thinking-2507-GGUF/tree/main
|
# https://huggingface.co/unsloth/Qwen3-30B-A3B-Thinking-2507-GGUF/tree/main
|
||||||
"qwen3-30b-thinking" = {
|
"qwen3-30b-2507-thinking" = {
|
||||||
name = "Qwen3 (30B) - Thinking";
|
name = "Qwen3 2507 (30B) - Thinking";
|
||||||
cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-30B-A3B-Thinking-2507-Q4_K_M.gguf --ctx-size 16384 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 --cache-type-k q4_0 --cache-type-v q4_0";
|
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-30B-A3B-Thinking-2507-Q4_K_M.gguf --ctx-size 16384 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 --cache-type-k q4_0 --cache-type-v q4_0";
|
||||||
|
};
|
||||||
|
|
||||||
|
# https://huggingface.co/unsloth/Qwen3-Next-80B-A3B-Instruct-GGUF/tree/main
|
||||||
|
"qwen3-next-80b-instruct" = {
|
||||||
|
name = "Qwen3 Next (80B) - Instruct";
|
||||||
|
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-Next-80B-A3B-Instruct-UD-Q4_K_XL.gguf --ctx-size 32768 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 -sm none -ncmoe 39";
|
||||||
|
};
|
||||||
|
|
||||||
|
# https://huggingface.co/unsloth/SmolLM3-3B-128K-GGUF/tree/main
|
||||||
|
"smollm3-3b-instruct" = {
|
||||||
|
name = "SmolLM3(3B) - Instruct";
|
||||||
|
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/SmolLM3-3B-128K-UD-Q4_K_XL.gguf --ctx-size 98304 --temp 0.6 --top-p 0.95 --reasoning-budget 0 -sm none";
|
||||||
|
};
|
||||||
|
|
||||||
|
# https://huggingface.co/unsloth/ERNIE-4.5-21B-A3B-PT-GGUF/tree/main
|
||||||
|
"ernie4.5-21b-instruct" = {
|
||||||
|
name = "ERNIE4.5 (21B) - Instruct";
|
||||||
|
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/ERNIE-4.5-21B-A3B-PT-UD-Q4_K_XL.gguf --ctx-size 98304 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20";
|
||||||
|
};
|
||||||
|
|
||||||
|
# https://huggingface.co/unsloth/Qwen2.5-Coder-7B-Instruct-128K-GGUF/tree/main
|
||||||
|
"qwen2.5-coder-7b-instruct" = {
|
||||||
|
name = "Qwen2.5 Coder (7B) - Instruct";
|
||||||
|
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server -m /mnt/ssd/Models/Qwen2.5-Coder-7B-Instruct-Q8_0.gguf --fim-qwen-7b-default --ctx-size 131072 --port \${PORT}";
|
||||||
|
};
|
||||||
|
|
||||||
|
# https://huggingface.co/unsloth/Qwen2.5-Coder-3B-Instruct-128K-GGUF/tree/main
|
||||||
|
"qwen2.5-coder-3b-instruct" = {
|
||||||
|
name = "Qwen2.5 Coder (3B) - Instruct";
|
||||||
|
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server -m /mnt/ssd/Models/Qwen2.5-Coder-3B-Instruct-Q4_K_M.gguf --fim-qwen-3b-default --ctx-size 32768 -dev CUDA1 --port \${PORT}";
|
||||||
};
|
};
|
||||||
|
|
||||||
# https://huggingface.co/unsloth/Qwen3-VL-8B-Instruct-GGUF/tree/main
|
# https://huggingface.co/unsloth/Qwen3-VL-8B-Instruct-GGUF/tree/main
|
||||||
"qwen3-8b-vision" = {
|
"qwen3-8b-vision" = {
|
||||||
name = "Qwen3 Vision (8B) - Thinking";
|
name = "Qwen3 Vision (8B) - Thinking";
|
||||||
cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-VL-8B-Instruct-UD-Q4_K_XL.gguf --mmproj /mnt/ssd/Models/Qwen3-VL-8B-Instruct-UD-Q4_K_XL_mmproj-F16.gguf --ctx-size 131072 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 --cache-type-k q4_0 --cache-type-v q4_0";
|
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-VL-8B-Instruct-UD-Q4_K_XL.gguf --mmproj /mnt/ssd/Models/Qwen3-VL-8B-Instruct-UD-Q4_K_XL_mmproj-F16.gguf --ctx-size 131072 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 --cache-type-k q4_0 --cache-type-v q4_0";
|
||||||
};
|
};
|
||||||
|
|
||||||
# https://huggingface.co/mradermacher/OLMoE-1B-7B-0125-Instruct-GGUF/tree/main
|
# https://huggingface.co/mradermacher/OLMoE-1B-7B-0125-Instruct-GGUF/tree/main
|
||||||
"olmoe-7b-instruct" = {
|
"olmoe-7b-instruct" = {
|
||||||
name = "OLMoE (7B) - Instruct";
|
name = "OLMoE (7B) - Instruct";
|
||||||
cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/OLMoE-1B-7B-0125-Instruct.Q8_0.gguf -dev CUDA0";
|
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/OLMoE-1B-7B-0125-Instruct.Q8_0.gguf -dev CUDA0";
|
||||||
};
|
};
|
||||||
|
|
||||||
# https://huggingface.co/gabriellarson/Phi-mini-MoE-instruct-GGUF/tree/main
|
# https://huggingface.co/gabriellarson/Phi-mini-MoE-instruct-GGUF/tree/main
|
||||||
"phi-mini-8b-instruct" = {
|
"phi-mini-8b-instruct" = {
|
||||||
name = "Phi mini (8B) - Instruct";
|
name = "Phi mini (8B) - Instruct";
|
||||||
cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Phi-mini-MoE-instruct-Q8_0.gguf --repeat-penalty 1.05 --temp 0.0 --top-p 1.0 --top-k 1 -dev CUDA0";
|
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Phi-mini-MoE-instruct-Q8_0.gguf --repeat-penalty 1.05 --temp 0.0 --top-p 1.0 --top-k 1 -dev CUDA0";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
groups = {
|
||||||
|
coding = {
|
||||||
|
swap = false;
|
||||||
|
exclusive = true;
|
||||||
|
members = [
|
||||||
|
"gpt-oss-20b-thinking"
|
||||||
|
"qwen2.5-coder-3b-instruct"
|
||||||
|
];
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user