# syntax=docker/dockerfile:1.6 # # dflash_server: native C++/CUDA OpenAI-compatible HTTP server. # # Source lives in the `lucebox-hub` git submodule. Initialize it (and its # nested submodules) before building: # git submodule update --init --recursive # # Build context is the new-repo root; the Dockerfile copies just # `lucebox-hub/dflash` into the builder. # # Models are NOT baked into the image. Mount them at /models at runtime, e.g. # docker run --gpus all -v /host/models:/models -p 18080:18080 dflash-server \ # /models/Qwen3.6-27B-Q4_K_M.gguf \ # --draft /models/draft/dflash-draft-3.6-q8_0.gguf \ # --host 0.0.0.0 --port 18080 # # Targets a single CUDA arch. Override at build time: # docker build --build-arg CUDA_ARCH=89 -t dflash-server . ARG CUDA_VERSION=12.6.0 ARG UBUNTU_VERSION=22.04 # ─── Builder ────────────────────────────────────────────────────────────────── FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} AS builder ARG CUDA_ARCH=86 ARG CMAKE_BUILD_TYPE=Release ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ cmake \ ninja-build \ git \ ca-certificates \ pkg-config \ && rm -rf /var/lib/apt/lists/* WORKDIR /src COPY lucebox-hub/dflash /src # CUDA driver stub - The devel image ships libcuda.so (no .1 suffix) under # lib64/stubs for link-time resolution. ggml-cuda DT_NEEDEDs libcuda.so.1, # so symlink and add the dir to -rpath-link for the final exe link. RUN ln -sf libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 RUN cmake -S /src -B /src/build -G Ninja \ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH} \ -DCMAKE_EXE_LINKER_FLAGS="-Wl,-rpath-link,/usr/local/cuda/lib64/stubs" \ && cmake --build /src/build --target dflash_server -j"$(nproc)" RUN set -eux; \ mkdir -p /out/bin /out/lib; \ bin="$(find /src/build -maxdepth 4 -type f -name dflash_server -executable | head -n1)"; \ test -n "$bin" || { echo "dflash_server not found under /src/build" >&2; exit 1; }; \ cp "$bin" /out/bin/dflash_server; \ find /src/build \( -name '*.so' -o -name '*.so.*' \) -type f -exec cp -v {} /out/lib/ \; # ─── Runtime ────────────────────────────────────────────────────────────────── FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} AS runtime ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y --no-install-recommends \ libgomp1 \ ca-certificates \ && rm -rf /var/lib/apt/lists/* COPY --from=builder /out/bin/ /usr/local/bin/ COPY --from=builder /out/lib/ /usr/local/lib/ RUN ldconfig ENV NVIDIA_VISIBLE_DEVICES=all \ NVIDIA_DRIVER_CAPABILITIES=compute,utility \ LD_LIBRARY_PATH=/usr/local/lib:${LD_LIBRARY_PATH} EXPOSE 18080 VOLUME ["/models"] ENTRYPOINT ["/usr/local/bin/dflash_server"]