feat: initial dflash-server docker packaging
Multi-stage CUDA build of the native dflash_server from Luce-Org/lucebox-hub (pinned at 42f36f1). Models are not baked into the image; mount /models at runtime. - Dockerfile: nvidia/cuda:12.6.0 devel -> runtime, CUDA_ARCH build-arg (default sm_86), libcuda.so.1 stub symlink + -rpath-link fix - docker-compose.yml: reference service with ./models:/models:ro - Makefile: submodules / doctor / build / run / shell / up-down-logs / push / clean. push targets gitea.va.reichard.io/evan - README + .dockerignore + .gitignore
This commit is contained in:
81
Dockerfile
Normal file
81
Dockerfile
Normal file
@@ -0,0 +1,81 @@
|
||||
# syntax=docker/dockerfile:1.6
|
||||
#
|
||||
# dflash_server: native C++/CUDA OpenAI-compatible HTTP server.
|
||||
#
|
||||
# Source lives in the `lucebox-hub` git submodule. Initialize it (and its
|
||||
# nested submodules) before building:
|
||||
# git submodule update --init --recursive
|
||||
#
|
||||
# Build context is the new-repo root; the Dockerfile copies just
|
||||
# `lucebox-hub/dflash` into the builder.
|
||||
#
|
||||
# Models are NOT baked into the image. Mount them at /models at runtime, e.g.
|
||||
# docker run --gpus all -v /host/models:/models -p 18080:18080 dflash-server \
|
||||
# /models/Qwen3.6-27B-Q4_K_M.gguf \
|
||||
# --draft /models/draft/dflash-draft-3.6-q8_0.gguf \
|
||||
# --host 0.0.0.0 --port 18080
|
||||
#
|
||||
# Targets a single CUDA arch. Override at build time:
|
||||
# docker build --build-arg CUDA_ARCH=89 -t dflash-server .
|
||||
|
||||
ARG CUDA_VERSION=12.6.0
|
||||
ARG UBUNTU_VERSION=22.04
|
||||
|
||||
# ─── Builder ──────────────────────────────────────────────────────────────────
|
||||
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} AS builder
|
||||
|
||||
ARG CUDA_ARCH=86
|
||||
ARG CMAKE_BUILD_TYPE=Release
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
cmake \
|
||||
ninja-build \
|
||||
git \
|
||||
ca-certificates \
|
||||
pkg-config \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /src
|
||||
COPY lucebox-hub/dflash /src
|
||||
|
||||
# CUDA driver stub - The devel image ships libcuda.so (no .1 suffix) under
|
||||
# lib64/stubs for link-time resolution. ggml-cuda DT_NEEDEDs libcuda.so.1,
|
||||
# so symlink and add the dir to -rpath-link for the final exe link.
|
||||
RUN ln -sf libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1
|
||||
|
||||
RUN cmake -S /src -B /src/build -G Ninja \
|
||||
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
|
||||
-DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH} \
|
||||
-DCMAKE_EXE_LINKER_FLAGS="-Wl,-rpath-link,/usr/local/cuda/lib64/stubs" \
|
||||
&& cmake --build /src/build --target dflash_server -j"$(nproc)"
|
||||
|
||||
RUN set -eux; \
|
||||
mkdir -p /out/bin /out/lib; \
|
||||
bin="$(find /src/build -maxdepth 4 -type f -name dflash_server -executable | head -n1)"; \
|
||||
test -n "$bin" || { echo "dflash_server not found under /src/build" >&2; exit 1; }; \
|
||||
cp "$bin" /out/bin/dflash_server; \
|
||||
find /src/build \( -name '*.so' -o -name '*.so.*' \) -type f -exec cp -v {} /out/lib/ \;
|
||||
|
||||
# ─── Runtime ──────────────────────────────────────────────────────────────────
|
||||
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} AS runtime
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libgomp1 \
|
||||
ca-certificates \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY --from=builder /out/bin/ /usr/local/bin/
|
||||
COPY --from=builder /out/lib/ /usr/local/lib/
|
||||
RUN ldconfig
|
||||
|
||||
ENV NVIDIA_VISIBLE_DEVICES=all \
|
||||
NVIDIA_DRIVER_CAPABILITIES=compute,utility \
|
||||
LD_LIBRARY_PATH=/usr/local/lib:${LD_LIBRARY_PATH}
|
||||
|
||||
EXPOSE 18080
|
||||
VOLUME ["/models"]
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/dflash_server"]
|
||||
Reference in New Issue
Block a user