chore(llm): clean up models & fix llama-cpp issue

2025-12-10 12:12:50 -05:00
parent 30934c8f7c
commit c1a650a90e
12 changed files with 501 additions and 135 deletions
--- a/packages/llama-swap/default.nix
+++ b/packages/llama-swap/default.nix
@@ -0,0 +1,143 @@
+{ lib
+, stdenv
+, buildGoModule
+, fetchFromGitHub
+, versionCheckHook
+, callPackage
+, nixosTests
+,
+}:
+
+let
+  canExecute = stdenv.buildPlatform.canExecute stdenv.hostPlatform;
+in
+buildGoModule (finalAttrs: {
+  pname = "llama-swap";
+  version = "176";
+
+  src = fetchFromGitHub {
+    owner = "mostlygeek";
+    repo = "llama-swap";
+    tag = "v${finalAttrs.version}";
+    hash = "sha256-nfkuaiEITOmpkiLft3iNW1VUexHwZ36c8gwcQKGANbQ=";
+    # populate values that require us to use git. By doing this in postFetch we
+    # can delete .git afterwards and maintain better reproducibility of the src.
+    leaveDotGit = true;
+    postFetch = ''
+      cd "$out"
+      git rev-parse HEAD > $out/COMMIT
+      # '0000-00-00T00:00:00Z'
+      date -u -d "@$(git log -1 --pretty=%ct)" "+'%Y-%m-%dT%H:%M:%SZ'" > $out/SOURCE_DATE_EPOCH
+      find "$out" -name .git -print0 | xargs -0 rm -rf
+    '';
+  };
+
+  vendorHash = "sha256-/EbFyuCVFxHTTO0UwSV3B/6PYUpudxB2FD8nNx1Bb+M=";
+
+  passthru.ui = callPackage ./ui.nix { llama-swap = finalAttrs.finalPackage; };
+  passthru.npmDepsHash = "sha256-RKPcMwJ0qVOgbTxoGryrLn7AW0Bfmv9WasoY+gw4B30=";
+
+  nativeBuildInputs = [
+    versionCheckHook
+  ];
+
+  # required for testing
+  __darwinAllowLocalNetworking = true;
+
+  ldflags = [
+    "-s"
+    "-w"
+    "-X main.version=${finalAttrs.version}"
+  ];
+
+  preBuild = ''
+    # ldflags based on metadata from git and source
+    ldflags+=" -X main.commit=$(cat COMMIT)"
+    ldflags+=" -X main.date=$(cat SOURCE_DATE_EPOCH)"
+
+    # copy for go:embed in proxy/ui_embed.go
+    cp -r ${finalAttrs.passthru.ui}/ui_dist proxy/
+  '';
+
+  excludedPackages = [
+    # regression testing tool
+    "misc/process-cmd-test"
+    # benchmark/regression testing tool
+    "misc/benchmark-chatcompletion"
+  ]
+  ++ lib.optionals (!canExecute) [
+    # some tests expect to execute `simple-something`; if it can't be executed
+    # it's unneeded
+    "misc/simple-responder"
+  ];
+
+  checkFlags =
+    let
+      skippedTests = lib.optionals (stdenv.isDarwin && stdenv.isx86_64) [
+        # Fail only on x86_64-darwin intermittently
+        # https://github.com/mostlygeek/llama-swap/issues/320
+        "TestProcess_AutomaticallyStartsUpstream"
+        "TestProcess_WaitOnMultipleStarts"
+        "TestProcess_BrokenModelConfig"
+        "TestProcess_UnloadAfterTTL"
+        "TestProcess_LowTTLValue"
+        "TestProcess_HTTPRequestsHaveTimeToFinish"
+        "TestProcess_SwapState"
+        "TestProcess_ShutdownInterruptsHealthCheck"
+        "TestProcess_ExitInterruptsHealthCheck"
+        "TestProcess_ConcurrencyLimit"
+        "TestProcess_StopImmediately"
+        "TestProcess_ForceStopWithKill"
+        "TestProcess_StopCmd"
+        "TestProcess_EnvironmentSetCorrectly"
+      ];
+    in
+    [ "-skip=^${builtins.concatStringsSep "$|^" skippedTests}$" ];
+
+  # some tests expect to execute `simple-something` and proxy/helpers_test.go
+  # checks the file exists
+  doCheck = canExecute;
+  preCheck = ''
+    mkdir build
+    ln -s "$GOPATH/bin/simple-responder" "./build/simple-responder_''${GOOS}_''${GOARCH}"
+  '';
+  postCheck = ''
+    rm "$GOPATH/bin/simple-responder"
+  '';
+
+  preInstall = ''
+    install -Dm444 -t "$out/share/llama-swap" config.example.yaml
+  '';
+
+  doInstallCheck = true;
+  versionCheckProgramArg = "-version";
+
+  passthru.tests.nixos = nixosTests.llama-swap;
+
+  meta = {
+    homepage = "https://github.com/mostlygeek/llama-swap";
+    changelog = "https://github.com/mostlygeek/llama-swap/releases/tag/${finalAttrs.src.tag}";
+    description = "Model swapping for llama.cpp (or any local OpenAPI compatible server)";
+    longDescription = ''
+      llama-swap is a light weight, transparent proxy server that provides
+      automatic model swapping to llama.cpp's server.
+
+      When a request is made to an OpenAI compatible endpoint, llama-swap will
+      extract the `model` value and load the appropriate server configuration to
+      serve it. If the wrong upstream server is running, it will be replaced
+      with the correct one. This is where the "swap" part comes in. The upstream
+      server is automatically swapped to the correct one to serve the request.
+
+      In the most basic configuration llama-swap handles one model at a time.
+      For more advanced use cases, the `groups` feature allows multiple models
+      to be loaded at the same time. You have complete control over how your
+      system resources are used.
+    '';
+    license = lib.licenses.mit;
+    mainProgram = "llama-swap";
+    maintainers = with lib.maintainers; [
+      jk
+      podium868909
+    ];
+  };
+})