chore(llm): clean up models & fix llama-cpp issue
This commit is contained in:
@@ -35,7 +35,8 @@
|
||||
};
|
||||
};
|
||||
|
||||
outputs = inputs:
|
||||
outputs =
|
||||
inputs:
|
||||
inputs.snowfall-lib.mkFlake {
|
||||
inherit inputs;
|
||||
src = ./.;
|
||||
|
||||
@@ -1,4 +1,9 @@
|
||||
{ pkgs, lib, config, namespace, ... }:
|
||||
{ pkgs
|
||||
, lib
|
||||
, config
|
||||
, namespace
|
||||
, ...
|
||||
}:
|
||||
let
|
||||
inherit (lib.${namespace}) enabled;
|
||||
in
|
||||
@@ -11,15 +16,6 @@ in
|
||||
inherit (config.snowfallorg.user) name;
|
||||
};
|
||||
|
||||
services = {
|
||||
# TODO
|
||||
# sops = {
|
||||
# enable = true;
|
||||
# defaultSopsFile = lib.snowfall.fs.get-file "secrets/mac-va-mbp-work/evanreichard/default.yaml";
|
||||
# sshKeyPaths = [ "${config.home.homeDirectory}/.ssh/id_ed25519" ];
|
||||
# };
|
||||
};
|
||||
|
||||
programs = {
|
||||
graphical = {
|
||||
ghostty = enabled;
|
||||
@@ -47,6 +43,9 @@ in
|
||||
texliveSmall # Pandoc PDF Dep
|
||||
google-cloud-sdk
|
||||
tldr
|
||||
opencode
|
||||
claude-code
|
||||
reichard.qwen-code
|
||||
];
|
||||
|
||||
# SQLite Configuration
|
||||
|
||||
@@ -3,67 +3,67 @@ require("luasnip.loaders.from_vscode").lazy_load()
|
||||
|
||||
-- Check Tab Completion
|
||||
local has_words_before = function()
|
||||
local line, col = unpack(vim.api.nvim_win_get_cursor(0))
|
||||
return col ~= 0 and
|
||||
vim.api.nvim_buf_get_lines(0, line - 1, line, true)[1]:sub(col,
|
||||
col)
|
||||
:match("%s") == nil
|
||||
local line, col = unpack(vim.api.nvim_win_get_cursor(0))
|
||||
return col ~= 0 and
|
||||
vim.api.nvim_buf_get_lines(0, line - 1, line, true)[1]:sub(col,
|
||||
col)
|
||||
:match("%s") == nil
|
||||
end
|
||||
|
||||
cmp.setup({
|
||||
snippet = {
|
||||
expand = function(args) require'luasnip'.lsp_expand(args.body) end
|
||||
},
|
||||
snippet = {
|
||||
expand = function(args) require 'luasnip'.lsp_expand(args.body) end
|
||||
},
|
||||
|
||||
mapping = cmp.mapping.preset.insert({
|
||||
mapping = cmp.mapping.preset.insert({
|
||||
|
||||
-- Tab Completion
|
||||
["<Tab>"] = cmp.mapping(function(fallback)
|
||||
if cmp.visible() then
|
||||
cmp.select_next_item()
|
||||
elseif has_words_before() then
|
||||
cmp.complete()
|
||||
else
|
||||
fallback()
|
||||
end
|
||||
end, {"i", "s"}),
|
||||
-- Tab Completion
|
||||
["<Tab>"] = cmp.mapping(function(fallback)
|
||||
if cmp.visible() then
|
||||
cmp.select_next_item()
|
||||
elseif has_words_before() then
|
||||
cmp.complete()
|
||||
else
|
||||
fallback()
|
||||
end
|
||||
end, { "i", "s" }),
|
||||
|
||||
-- Reverse Tab Completion
|
||||
["<S-Tab>"] = cmp.mapping(function(fallback)
|
||||
if cmp.visible() then
|
||||
cmp.select_prev_item()
|
||||
else
|
||||
fallback()
|
||||
end
|
||||
end, {"i", "s"}),
|
||||
-- Reverse Tab Completion
|
||||
["<S-Tab>"] = cmp.mapping(function(fallback)
|
||||
if cmp.visible() then
|
||||
cmp.select_prev_item()
|
||||
else
|
||||
fallback()
|
||||
end
|
||||
end, { "i", "s" }),
|
||||
|
||||
-- Misc Mappings
|
||||
['<C-b>'] = cmp.mapping.scroll_docs(-4),
|
||||
['<C-f>'] = cmp.mapping.scroll_docs(4),
|
||||
['<C-Space>'] = cmp.mapping.complete(),
|
||||
['<C-e>'] = cmp.mapping.abort(),
|
||||
['<CR>'] = cmp.mapping.confirm({select = true})
|
||||
-- Misc Mappings
|
||||
['<C-b>'] = cmp.mapping.scroll_docs(-4),
|
||||
['<C-f>'] = cmp.mapping.scroll_docs(4),
|
||||
['<C-Space>'] = cmp.mapping.complete(),
|
||||
['<C-e>'] = cmp.mapping.abort(),
|
||||
['<CR>'] = cmp.mapping.confirm({ select = true })
|
||||
|
||||
}),
|
||||
}),
|
||||
|
||||
-- Default Sources
|
||||
sources = cmp.config.sources({
|
||||
{name = 'nvim_lsp'}, {name = 'luasnip'}, {name = 'path'},
|
||||
{name = 'buffer'}
|
||||
})
|
||||
-- Default Sources
|
||||
sources = cmp.config.sources({
|
||||
{ name = 'nvim_lsp' }, { name = 'luasnip' }, { name = 'path' },
|
||||
{ name = 'buffer' }
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
-- Completion - `/` and `?`
|
||||
cmp.setup.cmdline({'/', '?'}, {
|
||||
mapping = cmp.mapping.preset.cmdline(),
|
||||
sources = {{name = 'buffer'}}
|
||||
cmp.setup.cmdline({ '/', '?' }, {
|
||||
mapping = cmp.mapping.preset.cmdline(),
|
||||
sources = { { name = 'buffer' } }
|
||||
})
|
||||
|
||||
-- Completion = `:`
|
||||
cmp.setup.cmdline(':', {
|
||||
mapping = cmp.mapping.preset.cmdline(),
|
||||
sources = cmp.config.sources({{name = 'path'}, {name = 'cmdline'}})
|
||||
mapping = cmp.mapping.preset.cmdline(),
|
||||
sources = cmp.config.sources({ { name = 'path' }, { name = 'cmdline' } })
|
||||
})
|
||||
|
||||
-- Autopairs
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
local llm_endpoint = "https://llm-api.va.reichard.io"
|
||||
local llm_model = "qwen3-coder-30b-instruct"
|
||||
local llm_assistant_model = "gpt-oss-20b-thinking"
|
||||
local llm_infill_model = "qwen2.5-coder-3b-instruct"
|
||||
|
||||
-- Default Llama - Toggle Llama & Copilot
|
||||
vim.g.copilot_filetypes = { ["*"] = false }
|
||||
local current_mode = "llama"
|
||||
-- vim.g.copilot_filetypes = { ["*"] = false }
|
||||
local current_mode = "copilot"
|
||||
local function toggle_llm_fim_provider()
|
||||
if current_mode == "llama" then
|
||||
vim.g.copilot_filetypes = { ["*"] = true }
|
||||
@@ -24,8 +25,10 @@ vim.keymap.set("n", "<leader>cf", toggle_llm_fim_provider, { desc = "Toggle FIM
|
||||
-- Configure LLama LLM FIM
|
||||
vim.g.llama_config = {
|
||||
endpoint = llm_endpoint .. "/infill",
|
||||
model = llm_model,
|
||||
n_predict = 1024,
|
||||
model = llm_infill_model,
|
||||
n_predict = 2048,
|
||||
ring_n_chunks = 32,
|
||||
enable_at_startup = false,
|
||||
}
|
||||
|
||||
-- Configure Code Companion
|
||||
@@ -39,7 +42,7 @@ require("codecompanion").setup({
|
||||
return require("codecompanion.adapters").extend("openai_compatible", {
|
||||
name = "llama-swap",
|
||||
formatted_name = "LlamaSwap",
|
||||
schema = { model = { default = llm_model } },
|
||||
schema = { model = { default = llm_assistant_model } },
|
||||
env = { url = llm_endpoint },
|
||||
})
|
||||
end,
|
||||
|
||||
31
packages/llama-cpp/additionalprops-unrecognized-schema.patch
Normal file
31
packages/llama-cpp/additionalprops-unrecognized-schema.patch
Normal file
@@ -0,0 +1,31 @@
|
||||
# This patch modifies the json-schema-to-grammar.cpp file to handle 'not: {}' constructs
|
||||
# specifically inside additionalProperties.
|
||||
#
|
||||
# Author: https://github.com/evanreichard
|
||||
|
||||
diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp
|
||||
index c3b4e5d..ea24bc3 100644
|
||||
--- a/common/json-schema-to-grammar.cpp
|
||||
+++ b/common/json-schema-to-grammar.cpp
|
||||
@@ -858,10 +858,19 @@ public:
|
||||
properties.emplace_back(prop.key(), prop.value());
|
||||
}
|
||||
}
|
||||
+ json additionalProps = schema.contains("additionalProperties") ? schema["additionalProperties"] : json();
|
||||
+ if (additionalProps.is_object() && additionalProps.contains("not")) {
|
||||
+ const auto& not_val = additionalProps["not"];
|
||||
+ if (not_val.is_object() && not_val.empty()) {
|
||||
+ additionalProps.erase("not");
|
||||
+ if (additionalProps.empty()) {
|
||||
+ additionalProps = false;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
return _add_rule(rule_name,
|
||||
_build_object_rule(
|
||||
- properties, required, name,
|
||||
- schema.contains("additionalProperties") ? schema["additionalProperties"] : json()));
|
||||
+ properties, required, name, additionalProps));
|
||||
} else if ((schema_type.is_null() || schema_type == "object" || schema_type == "string") && schema.contains("allOf")) {
|
||||
std::unordered_set<std::string> required;
|
||||
std::vector<std::pair<std::string, json>> properties;
|
||||
42
packages/llama-cpp/default.nix
Normal file
42
packages/llama-cpp/default.nix
Normal file
@@ -0,0 +1,42 @@
|
||||
{ pkgs }:
|
||||
(pkgs.llama-cpp.override {
|
||||
cudaSupport = true;
|
||||
blasSupport = true;
|
||||
rocmSupport = false;
|
||||
metalSupport = false;
|
||||
vulkanSupport = true;
|
||||
}).overrideAttrs
|
||||
(oldAttrs: rec {
|
||||
version = "7343";
|
||||
src = pkgs.fetchFromGitHub {
|
||||
owner = "ggml-org";
|
||||
repo = "llama.cpp";
|
||||
tag = "b${version}";
|
||||
hash = "sha256-hD8cyorU5NezRmKx+iN5gOD+3bAzS3IDVl7Ju5/zVHc=";
|
||||
leaveDotGit = true;
|
||||
postFetch = ''
|
||||
git -C "$out" rev-parse --short HEAD > $out/COMMIT
|
||||
find "$out" -name .git -print0 | xargs -0 rm -rf
|
||||
'';
|
||||
};
|
||||
|
||||
# Auto CPU Optimizations
|
||||
cmakeFlags = (oldAttrs.cmakeFlags or [ ]) ++ [
|
||||
"-DGGML_NATIVE=ON"
|
||||
"-DGGML_CUDA_ENABLE_UNIFIED_MEMORY=1"
|
||||
"-DCMAKE_CUDA_ARCHITECTURES=61" # GTX 1070 / GTX 1080ti
|
||||
];
|
||||
|
||||
# Disable Nix's march=native Stripping
|
||||
preConfigure = ''
|
||||
export NIX_ENFORCE_NO_NATIVE=0
|
||||
${oldAttrs.preConfigure or ""}
|
||||
'';
|
||||
|
||||
# Apply Patches
|
||||
patchFlags = [ "-p1" ];
|
||||
patches = (oldAttrs.patches or [ ]) ++ [
|
||||
./oneof-not-unrecognized-schema.patch
|
||||
./additionalprops-unrecognized-schema.patch
|
||||
];
|
||||
})
|
||||
28
packages/llama-cpp/oneof-not-unrecognized-schema.patch
Normal file
28
packages/llama-cpp/oneof-not-unrecognized-schema.patch
Normal file
@@ -0,0 +1,28 @@
|
||||
# This patch modifies the json-schema-to-grammar.cpp file to handle 'not: {}' constructs.
|
||||
#
|
||||
# Author: https://github.com/simaotwx
|
||||
# Reference: https://github.com/ggml-org/llama.cpp/issues/14227#issuecomment-3547740835
|
||||
|
||||
diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp
|
||||
index 478aa1be7..ec0b3b73e 100644
|
||||
--- a/common/json-schema-to-grammar.cpp
|
||||
+++ b/common/json-schema-to-grammar.cpp
|
||||
@@ -822,7 +822,17 @@ public:
|
||||
return _add_rule(rule_name, _resolve_ref(schema["$ref"]));
|
||||
} else if (schema.contains("oneOf") || schema.contains("anyOf")) {
|
||||
std::vector<json> alt_schemas = schema.contains("oneOf") ? schema["oneOf"].get<std::vector<json>>() : schema["anyOf"].get<std::vector<json>>();
|
||||
- return _add_rule(rule_name, _generate_union_rule(name, alt_schemas));
|
||||
+ std::vector<json> filtered_schemas;
|
||||
+ for (const auto& alt : alt_schemas) {
|
||||
+ if (alt.is_object() && alt.contains("not")) {
|
||||
+ const auto& not_val = alt["not"];
|
||||
+ if (not_val.is_object() && not_val.empty()) {
|
||||
+ continue;
|
||||
+ }
|
||||
+ }
|
||||
+ filtered_schemas.push_back(alt);
|
||||
+ }
|
||||
+ return _add_rule(rule_name, _generate_union_rule(name, filtered_schemas));
|
||||
} else if (schema_type.is_array()) {
|
||||
std::vector<json> schema_types;
|
||||
for (const auto & t : schema_type) {
|
||||
143
packages/llama-swap/default.nix
Normal file
143
packages/llama-swap/default.nix
Normal file
@@ -0,0 +1,143 @@
|
||||
{ lib
|
||||
, stdenv
|
||||
, buildGoModule
|
||||
, fetchFromGitHub
|
||||
, versionCheckHook
|
||||
, callPackage
|
||||
, nixosTests
|
||||
,
|
||||
}:
|
||||
|
||||
let
|
||||
canExecute = stdenv.buildPlatform.canExecute stdenv.hostPlatform;
|
||||
in
|
||||
buildGoModule (finalAttrs: {
|
||||
pname = "llama-swap";
|
||||
version = "176";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "mostlygeek";
|
||||
repo = "llama-swap";
|
||||
tag = "v${finalAttrs.version}";
|
||||
hash = "sha256-nfkuaiEITOmpkiLft3iNW1VUexHwZ36c8gwcQKGANbQ=";
|
||||
# populate values that require us to use git. By doing this in postFetch we
|
||||
# can delete .git afterwards and maintain better reproducibility of the src.
|
||||
leaveDotGit = true;
|
||||
postFetch = ''
|
||||
cd "$out"
|
||||
git rev-parse HEAD > $out/COMMIT
|
||||
# '0000-00-00T00:00:00Z'
|
||||
date -u -d "@$(git log -1 --pretty=%ct)" "+'%Y-%m-%dT%H:%M:%SZ'" > $out/SOURCE_DATE_EPOCH
|
||||
find "$out" -name .git -print0 | xargs -0 rm -rf
|
||||
'';
|
||||
};
|
||||
|
||||
vendorHash = "sha256-/EbFyuCVFxHTTO0UwSV3B/6PYUpudxB2FD8nNx1Bb+M=";
|
||||
|
||||
passthru.ui = callPackage ./ui.nix { llama-swap = finalAttrs.finalPackage; };
|
||||
passthru.npmDepsHash = "sha256-RKPcMwJ0qVOgbTxoGryrLn7AW0Bfmv9WasoY+gw4B30=";
|
||||
|
||||
nativeBuildInputs = [
|
||||
versionCheckHook
|
||||
];
|
||||
|
||||
# required for testing
|
||||
__darwinAllowLocalNetworking = true;
|
||||
|
||||
ldflags = [
|
||||
"-s"
|
||||
"-w"
|
||||
"-X main.version=${finalAttrs.version}"
|
||||
];
|
||||
|
||||
preBuild = ''
|
||||
# ldflags based on metadata from git and source
|
||||
ldflags+=" -X main.commit=$(cat COMMIT)"
|
||||
ldflags+=" -X main.date=$(cat SOURCE_DATE_EPOCH)"
|
||||
|
||||
# copy for go:embed in proxy/ui_embed.go
|
||||
cp -r ${finalAttrs.passthru.ui}/ui_dist proxy/
|
||||
'';
|
||||
|
||||
excludedPackages = [
|
||||
# regression testing tool
|
||||
"misc/process-cmd-test"
|
||||
# benchmark/regression testing tool
|
||||
"misc/benchmark-chatcompletion"
|
||||
]
|
||||
++ lib.optionals (!canExecute) [
|
||||
# some tests expect to execute `simple-something`; if it can't be executed
|
||||
# it's unneeded
|
||||
"misc/simple-responder"
|
||||
];
|
||||
|
||||
checkFlags =
|
||||
let
|
||||
skippedTests = lib.optionals (stdenv.isDarwin && stdenv.isx86_64) [
|
||||
# Fail only on x86_64-darwin intermittently
|
||||
# https://github.com/mostlygeek/llama-swap/issues/320
|
||||
"TestProcess_AutomaticallyStartsUpstream"
|
||||
"TestProcess_WaitOnMultipleStarts"
|
||||
"TestProcess_BrokenModelConfig"
|
||||
"TestProcess_UnloadAfterTTL"
|
||||
"TestProcess_LowTTLValue"
|
||||
"TestProcess_HTTPRequestsHaveTimeToFinish"
|
||||
"TestProcess_SwapState"
|
||||
"TestProcess_ShutdownInterruptsHealthCheck"
|
||||
"TestProcess_ExitInterruptsHealthCheck"
|
||||
"TestProcess_ConcurrencyLimit"
|
||||
"TestProcess_StopImmediately"
|
||||
"TestProcess_ForceStopWithKill"
|
||||
"TestProcess_StopCmd"
|
||||
"TestProcess_EnvironmentSetCorrectly"
|
||||
];
|
||||
in
|
||||
[ "-skip=^${builtins.concatStringsSep "$|^" skippedTests}$" ];
|
||||
|
||||
# some tests expect to execute `simple-something` and proxy/helpers_test.go
|
||||
# checks the file exists
|
||||
doCheck = canExecute;
|
||||
preCheck = ''
|
||||
mkdir build
|
||||
ln -s "$GOPATH/bin/simple-responder" "./build/simple-responder_''${GOOS}_''${GOARCH}"
|
||||
'';
|
||||
postCheck = ''
|
||||
rm "$GOPATH/bin/simple-responder"
|
||||
'';
|
||||
|
||||
preInstall = ''
|
||||
install -Dm444 -t "$out/share/llama-swap" config.example.yaml
|
||||
'';
|
||||
|
||||
doInstallCheck = true;
|
||||
versionCheckProgramArg = "-version";
|
||||
|
||||
passthru.tests.nixos = nixosTests.llama-swap;
|
||||
|
||||
meta = {
|
||||
homepage = "https://github.com/mostlygeek/llama-swap";
|
||||
changelog = "https://github.com/mostlygeek/llama-swap/releases/tag/${finalAttrs.src.tag}";
|
||||
description = "Model swapping for llama.cpp (or any local OpenAPI compatible server)";
|
||||
longDescription = ''
|
||||
llama-swap is a light weight, transparent proxy server that provides
|
||||
automatic model swapping to llama.cpp's server.
|
||||
|
||||
When a request is made to an OpenAI compatible endpoint, llama-swap will
|
||||
extract the `model` value and load the appropriate server configuration to
|
||||
serve it. If the wrong upstream server is running, it will be replaced
|
||||
with the correct one. This is where the "swap" part comes in. The upstream
|
||||
server is automatically swapped to the correct one to serve the request.
|
||||
|
||||
In the most basic configuration llama-swap handles one model at a time.
|
||||
For more advanced use cases, the `groups` feature allows multiple models
|
||||
to be loaded at the same time. You have complete control over how your
|
||||
system resources are used.
|
||||
'';
|
||||
license = lib.licenses.mit;
|
||||
mainProgram = "llama-swap";
|
||||
maintainers = with lib.maintainers; [
|
||||
jk
|
||||
podium868909
|
||||
];
|
||||
};
|
||||
})
|
||||
25
packages/llama-swap/ui.nix
Normal file
25
packages/llama-swap/ui.nix
Normal file
@@ -0,0 +1,25 @@
|
||||
{ llama-swap
|
||||
, buildNpmPackage
|
||||
,
|
||||
}:
|
||||
|
||||
buildNpmPackage (finalAttrs: {
|
||||
pname = "${llama-swap.pname}-ui";
|
||||
inherit (llama-swap) version src npmDepsHash;
|
||||
|
||||
postPatch = ''
|
||||
substituteInPlace vite.config.ts \
|
||||
--replace-fail "../proxy/ui_dist" "${placeholder "out"}/ui_dist"
|
||||
'';
|
||||
|
||||
sourceRoot = "${finalAttrs.src.name}/ui";
|
||||
|
||||
# bundled "ui_dist" doesn't need node_modules
|
||||
postInstall = ''
|
||||
rm -rf $out/lib
|
||||
'';
|
||||
|
||||
meta = (removeAttrs llama-swap.meta [ "mainProgram" ]) // {
|
||||
description = "${llama-swap.meta.description} - UI";
|
||||
};
|
||||
})
|
||||
91
packages/qwen-code/default.nix
Normal file
91
packages/qwen-code/default.nix
Normal file
@@ -0,0 +1,91 @@
|
||||
{ lib
|
||||
, buildNpmPackage
|
||||
, fetchFromGitHub
|
||||
, jq
|
||||
, git
|
||||
, ripgrep
|
||||
, pkg-config
|
||||
, glib
|
||||
, libsecret
|
||||
, ...
|
||||
}:
|
||||
buildNpmPackage (finalAttrs: {
|
||||
pname = "qwen-code";
|
||||
version = "0.4.0-nightly.20251209.a6a57233";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "QwenLM";
|
||||
repo = "qwen-code";
|
||||
tag = "v${finalAttrs.version}";
|
||||
hash = "sha256-s9m1IN6jDDbNPr/vI/UcrauYPiyQTDODarLP3EvnG3Y=";
|
||||
};
|
||||
|
||||
npmDepsHash = "sha256-ngAjCCoHLPZ+GgBRmAKbRYaF7l+RK3YGf1kEkwFbyQg=";
|
||||
|
||||
nativeBuildInputs = [
|
||||
jq
|
||||
pkg-config
|
||||
git
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
ripgrep
|
||||
glib
|
||||
libsecret
|
||||
];
|
||||
|
||||
postPatch = ''
|
||||
${jq}/bin/jq '
|
||||
del(.packages."node_modules/node-pty") |
|
||||
del(.packages."node_modules/@lydell/node-pty") |
|
||||
del(.packages."node_modules/@lydell/node-pty-darwin-arm64") |
|
||||
del(.packages."node_modules/@lydell/node-pty-darwin-x64") |
|
||||
del(.packages."node_modules/@lydell/node-pty-linux-arm64") |
|
||||
del(.packages."node_modules/@lydell/node-pty-linux-x64") |
|
||||
del(.packages."node_modules/@lydell/node-pty-win32-arm64") |
|
||||
del(.packages."node_modules/@lydell/node-pty-win32-x64") |
|
||||
del(.packages."node_modules/keytar") |
|
||||
walk(
|
||||
if type == "object" and has("dependencies") then
|
||||
.dependencies |= with_entries(select(.key | (contains("node-pty") | not) and (contains("keytar") | not)))
|
||||
elif type == "object" and has("optionalDependencies") then
|
||||
.optionalDependencies |= with_entries(select(.key | (contains("node-pty") | not) and (contains("keytar") | not)))
|
||||
else .
|
||||
end
|
||||
) |
|
||||
walk(
|
||||
if type == "object" and has("peerDependencies") then
|
||||
.peerDependencies |= with_entries(select(.key | (contains("node-pty") | not) and (contains("keytar") | not)))
|
||||
else .
|
||||
end
|
||||
)
|
||||
' package-lock.json > package-lock.json.tmp && mv package-lock.json.tmp package-lock.json
|
||||
'';
|
||||
|
||||
buildPhase = ''
|
||||
runHook preBuild
|
||||
npm run generate
|
||||
npm run bundle
|
||||
runHook postBuild
|
||||
'';
|
||||
|
||||
installPhase = ''
|
||||
runHook preInstall
|
||||
mkdir -p $out/bin $out/share/qwen-code
|
||||
cp -r dist/* $out/share/qwen-code/
|
||||
npm prune --production
|
||||
cp -r node_modules $out/share/qwen-code/
|
||||
find $out/share/qwen-code/node_modules -type l -delete || true
|
||||
patchShebangs $out/share/qwen-code
|
||||
ln -s $out/share/qwen-code/cli.js $out/bin/qwen
|
||||
runHook postInstall
|
||||
'';
|
||||
|
||||
meta = {
|
||||
description = "Coding agent that lives in digital world";
|
||||
homepage = "https://github.com/QwenLM/qwen-code";
|
||||
mainProgram = "qwen";
|
||||
license = lib.licenses.asl20;
|
||||
platforms = lib.platforms.all;
|
||||
};
|
||||
})
|
||||
@@ -11,45 +11,22 @@ in
|
||||
system.stateVersion = "25.11";
|
||||
time.timeZone = "America/New_York";
|
||||
hardware.nvidia-container-toolkit.enable = true;
|
||||
security.pam.loginLimits = [
|
||||
{
|
||||
domain = "*";
|
||||
type = "soft";
|
||||
item = "memlock";
|
||||
value = "unlimited";
|
||||
}
|
||||
{
|
||||
domain = "*";
|
||||
type = "hard";
|
||||
item = "memlock";
|
||||
value = "unlimited";
|
||||
}
|
||||
];
|
||||
|
||||
nixpkgs.config = {
|
||||
allowUnfree = true;
|
||||
packageOverrides = pkgs: {
|
||||
llama-cpp =
|
||||
(pkgs.llama-cpp.override {
|
||||
cudaSupport = true;
|
||||
blasSupport = true;
|
||||
rocmSupport = false;
|
||||
metalSupport = false;
|
||||
vulkanSupport = true;
|
||||
}).overrideAttrs
|
||||
(oldAttrs: rec {
|
||||
version = "7278";
|
||||
src = pkgs.fetchFromGitHub {
|
||||
owner = "ggml-org";
|
||||
repo = "llama.cpp";
|
||||
tag = "b${version}";
|
||||
hash = "sha256-Gxi/sUIuVvX5+mcZj9vCvUgODsWPAFzESQz8TjTe/Mk=";
|
||||
leaveDotGit = true;
|
||||
postFetch = ''
|
||||
git -C "$out" rev-parse --short HEAD > $out/COMMIT
|
||||
find "$out" -name .git -print0 | xargs -0 rm -rf
|
||||
'';
|
||||
};
|
||||
# Auto CPU Optimizations
|
||||
cmakeFlags = (oldAttrs.cmakeFlags or [ ]) ++ [
|
||||
"-DGGML_NATIVE=ON"
|
||||
"-DGGML_CUDA_ENABLE_UNIFIED_MEMORY=1"
|
||||
"-DCMAKE_CUDA_ARCHITECTURES=61" # GTX 1070 / GTX 1080ti
|
||||
];
|
||||
# Disable Nix's march=native Stripping
|
||||
preConfigure = ''
|
||||
export NIX_ENFORCE_NO_NATIVE=0
|
||||
${oldAttrs.preConfigure or ""}
|
||||
'';
|
||||
});
|
||||
};
|
||||
};
|
||||
nixpkgs.config.allowUnfree = true;
|
||||
|
||||
fileSystems."/mnt/ssd" = {
|
||||
device = "/dev/disk/by-id/ata-Samsung_SSD_870_EVO_1TB_S6PTNZ0R620739L-part1";
|
||||
@@ -106,78 +83,103 @@ in
|
||||
virtualisation = {
|
||||
podman = enabled;
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
systemd.services.llama-swap.serviceConfig.LimitMEMLOCK = "infinity";
|
||||
services.llama-swap = {
|
||||
enable = true;
|
||||
openFirewall = true;
|
||||
package = pkgs.reichard.llama-swap;
|
||||
settings = {
|
||||
models = {
|
||||
# https://huggingface.co/unsloth/SmolLM3-3B-128K-GGUF/tree/main
|
||||
"smollm3-3b-instruct" = {
|
||||
name = "SmolLM3(3B) - Instruct";
|
||||
cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/SmolLM3-3B-128K-UD-Q4_K_XL.gguf --ctx-size 98304 --temp 0.6 --top-p 0.95 --reasoning-budget 0 -sm none";
|
||||
};
|
||||
|
||||
# https://huggingface.co/unsloth/Qwen3-Next-80B-A3B-Instruct-GGUF/tree/main
|
||||
"qwen3-next-80b-instruct" = {
|
||||
name = "Qwen3 Next (80B) - Instruct";
|
||||
cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-Next-80B-A3B-Instruct-UD-Q4_K_XL.gguf --ctx-size 32768 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 -sm none -ncmoe 39";
|
||||
};
|
||||
|
||||
# https://huggingface.co/mradermacher/gpt-oss-20b-heretic-GGUF/tree/main
|
||||
"gpt-oss-20b-thinking" = {
|
||||
name = "GPT OSS (20B) - Thinking";
|
||||
cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/gpt-oss-20b-heretic-MXFP4.gguf --ctx-size 128000 --chat-template-kwargs '{\"reasoning_effort\":\"low\"}'";
|
||||
};
|
||||
|
||||
# https://huggingface.co/unsloth/ERNIE-4.5-21B-A3B-PT-GGUF/tree/main
|
||||
"ernie4.5-21b-instruct" = {
|
||||
name = "ERNIE4.5 (21B) - Instruct";
|
||||
cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/ERNIE-4.5-21B-A3B-PT-UD-Q4_K_XL.gguf --ctx-size 98304 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20";
|
||||
};
|
||||
|
||||
# https://huggingface.co/unsloth/Qwen2.5-Coder-7B-Instruct-128K-GGUF/tree/main
|
||||
"qwen2.5-coder-7b-instruct" = {
|
||||
name = "Qwen2.5 Coder (7B) - Instruct";
|
||||
cmd = "${pkgs.llama-cpp}/bin/llama-server -m /mnt/ssd/Models/Qwen2.5-Coder-7B-Instruct-Q8_0.gguf --fim-qwen-7b-default --ctx-size 131072 --port \${PORT}";
|
||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/gpt-oss-20b-heretic-MXFP4.gguf --ctx-size 128000 -ts 75,25 --mlock --chat-template-kwargs '{\"reasoning_effort\":\"low\"}'";
|
||||
aliases = [
|
||||
"claude-sonnet-4-5"
|
||||
"claude-sonnet-4-5-20250929"
|
||||
"claude-haiku-4-5"
|
||||
"claude-haiku-4-5-20251001"
|
||||
"claude-opus-4-5"
|
||||
"claude-opus-4-5-20251101"
|
||||
];
|
||||
};
|
||||
|
||||
# https://huggingface.co/unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF/tree/main
|
||||
"qwen3-coder-30b-instruct" = {
|
||||
name = "Qwen3 Coder (30B) - Instruct";
|
||||
cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-Coder-30B-A3B-Instruct-UD-Q4_K_XL.gguf --ctx-size 55000 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 --cache-type-k q4_0 --cache-type-v q4_0";
|
||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-Coder-30B-A3B-Instruct-UD-IQ2_M.gguf --ctx-size 262144 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 --repeat-penalty 1.05 --cache-type-k q4_0 --cache-type-v q4_0 --mlock";
|
||||
};
|
||||
|
||||
# https://huggingface.co/unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF/tree/main
|
||||
"qwen3-30b-instruct" = {
|
||||
name = "Qwen3 (30B) - Instruct";
|
||||
cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf --ctx-size 16384 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 --cache-type-k q4_0 --cache-type-v q4_0";
|
||||
"qwen3-30b-2507-instruct" = {
|
||||
name = "Qwen3 2507 (30B) - Instruct";
|
||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-30B-A3B-Instruct-2507-UD-IQ2_M.gguf --ctx-size 262144 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 --repeat-penalty 1.05 --cache-type-k q4_0 --cache-type-v q4_0";
|
||||
};
|
||||
|
||||
# https://huggingface.co/unsloth/Qwen3-30B-A3B-Thinking-2507-GGUF/tree/main
|
||||
"qwen3-30b-thinking" = {
|
||||
name = "Qwen3 (30B) - Thinking";
|
||||
cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-30B-A3B-Thinking-2507-Q4_K_M.gguf --ctx-size 16384 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 --cache-type-k q4_0 --cache-type-v q4_0";
|
||||
"qwen3-30b-2507-thinking" = {
|
||||
name = "Qwen3 2507 (30B) - Thinking";
|
||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-30B-A3B-Thinking-2507-Q4_K_M.gguf --ctx-size 16384 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 --cache-type-k q4_0 --cache-type-v q4_0";
|
||||
};
|
||||
|
||||
# https://huggingface.co/unsloth/Qwen3-Next-80B-A3B-Instruct-GGUF/tree/main
|
||||
"qwen3-next-80b-instruct" = {
|
||||
name = "Qwen3 Next (80B) - Instruct";
|
||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-Next-80B-A3B-Instruct-UD-Q4_K_XL.gguf --ctx-size 32768 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 -sm none -ncmoe 39";
|
||||
};
|
||||
|
||||
# https://huggingface.co/unsloth/SmolLM3-3B-128K-GGUF/tree/main
|
||||
"smollm3-3b-instruct" = {
|
||||
name = "SmolLM3(3B) - Instruct";
|
||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/SmolLM3-3B-128K-UD-Q4_K_XL.gguf --ctx-size 98304 --temp 0.6 --top-p 0.95 --reasoning-budget 0 -sm none";
|
||||
};
|
||||
|
||||
# https://huggingface.co/unsloth/ERNIE-4.5-21B-A3B-PT-GGUF/tree/main
|
||||
"ernie4.5-21b-instruct" = {
|
||||
name = "ERNIE4.5 (21B) - Instruct";
|
||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/ERNIE-4.5-21B-A3B-PT-UD-Q4_K_XL.gguf --ctx-size 98304 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20";
|
||||
};
|
||||
|
||||
# https://huggingface.co/unsloth/Qwen2.5-Coder-7B-Instruct-128K-GGUF/tree/main
|
||||
"qwen2.5-coder-7b-instruct" = {
|
||||
name = "Qwen2.5 Coder (7B) - Instruct";
|
||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server -m /mnt/ssd/Models/Qwen2.5-Coder-7B-Instruct-Q8_0.gguf --fim-qwen-7b-default --ctx-size 131072 --port \${PORT}";
|
||||
};
|
||||
|
||||
# https://huggingface.co/unsloth/Qwen2.5-Coder-3B-Instruct-128K-GGUF/tree/main
|
||||
"qwen2.5-coder-3b-instruct" = {
|
||||
name = "Qwen2.5 Coder (3B) - Instruct";
|
||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server -m /mnt/ssd/Models/Qwen2.5-Coder-3B-Instruct-Q4_K_M.gguf --fim-qwen-3b-default --ctx-size 32768 -dev CUDA1 --port \${PORT}";
|
||||
};
|
||||
|
||||
# https://huggingface.co/unsloth/Qwen3-VL-8B-Instruct-GGUF/tree/main
|
||||
"qwen3-8b-vision" = {
|
||||
name = "Qwen3 Vision (8B) - Thinking";
|
||||
cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-VL-8B-Instruct-UD-Q4_K_XL.gguf --mmproj /mnt/ssd/Models/Qwen3-VL-8B-Instruct-UD-Q4_K_XL_mmproj-F16.gguf --ctx-size 131072 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 --cache-type-k q4_0 --cache-type-v q4_0";
|
||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Qwen3-VL-8B-Instruct-UD-Q4_K_XL.gguf --mmproj /mnt/ssd/Models/Qwen3-VL-8B-Instruct-UD-Q4_K_XL_mmproj-F16.gguf --ctx-size 131072 --temp 0.7 --min-p 0.0 --top-p 0.8 --top-k 20 --cache-type-k q4_0 --cache-type-v q4_0";
|
||||
};
|
||||
|
||||
# https://huggingface.co/mradermacher/OLMoE-1B-7B-0125-Instruct-GGUF/tree/main
|
||||
"olmoe-7b-instruct" = {
|
||||
name = "OLMoE (7B) - Instruct";
|
||||
cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/OLMoE-1B-7B-0125-Instruct.Q8_0.gguf -dev CUDA0";
|
||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/OLMoE-1B-7B-0125-Instruct.Q8_0.gguf -dev CUDA0";
|
||||
};
|
||||
|
||||
# https://huggingface.co/gabriellarson/Phi-mini-MoE-instruct-GGUF/tree/main
|
||||
"phi-mini-8b-instruct" = {
|
||||
name = "Phi mini (8B) - Instruct";
|
||||
cmd = "${pkgs.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Phi-mini-MoE-instruct-Q8_0.gguf --repeat-penalty 1.05 --temp 0.0 --top-p 1.0 --top-k 1 -dev CUDA0";
|
||||
cmd = "${pkgs.reichard.llama-cpp}/bin/llama-server --port \${PORT} -m /mnt/ssd/Models/Phi-mini-MoE-instruct-Q8_0.gguf --repeat-penalty 1.05 --temp 0.0 --top-p 1.0 --top-k 1 -dev CUDA0";
|
||||
};
|
||||
};
|
||||
groups = {
|
||||
coding = {
|
||||
swap = false;
|
||||
exclusive = true;
|
||||
members = [
|
||||
"gpt-oss-20b-thinking"
|
||||
"qwen2.5-coder-3b-instruct"
|
||||
];
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user