dflash-server-docker/Dockerfile

# syntax=docker/dockerfile:1.6
#
# dflash_server: native C++/CUDA OpenAI-compatible HTTP server.
#
# Source lives in the `lucebox-hub` git submodule. Initialize it (and its
# nested submodules) before building:
#   git submodule update --init --recursive
#
# Build context is the new-repo root; the Dockerfile copies just
# `lucebox-hub/dflash` into the builder.
#
# Models are NOT baked into the image. Mount them at /models at runtime, e.g.
#   docker run --gpus all -v /host/models:/models -p 18080:18080 dflash-server \
#       /models/Qwen3.6-27B-Q4_K_M.gguf \
#       --draft /models/draft/dflash-draft-3.6-q8_0.gguf \
#       --host 0.0.0.0 --port 18080
#
# Targets a single CUDA arch. Override at build time:
#   docker build --build-arg CUDA_ARCH=89 -t dflash-server .

ARG CUDA_VERSION=12.6.0
ARG UBUNTU_VERSION=22.04

# ─── Builder ──────────────────────────────────────────────────────────────────
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} AS builder

ARG CUDA_ARCH=86
ARG CMAKE_BUILD_TYPE=Release

ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y --no-install-recommends \
        build-essential \
        cmake \
        ninja-build \
        git \
        ca-certificates \
        pkg-config \
    && rm -rf /var/lib/apt/lists/*

WORKDIR /src
COPY lucebox-hub/dflash /src

# CUDA driver stub - The devel image ships libcuda.so (no .1 suffix) under
# lib64/stubs for link-time resolution. ggml-cuda DT_NEEDEDs libcuda.so.1,
# so symlink and add the dir to -rpath-link for the final exe link.
RUN ln -sf libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1

RUN cmake -S /src -B /src/build -G Ninja \
        -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
        -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH} \
        -DCMAKE_EXE_LINKER_FLAGS="-Wl,-rpath-link,/usr/local/cuda/lib64/stubs" \
    && cmake --build /src/build --target dflash_server -j"$(nproc)"

RUN set -eux; \
    mkdir -p /out/bin /out/lib; \
    bin="$(find /src/build -maxdepth 4 -type f -name dflash_server -executable | head -n1)"; \
    test -n "$bin" || { echo "dflash_server not found under /src/build" >&2; exit 1; }; \
    cp "$bin" /out/bin/dflash_server; \
    find /src/build \( -name '*.so' -o -name '*.so.*' \) -type f -exec cp -v {} /out/lib/ \;

# ─── Runtime ──────────────────────────────────────────────────────────────────
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} AS runtime

ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y --no-install-recommends \
        libgomp1 \
        ca-certificates \
    && rm -rf /var/lib/apt/lists/*

COPY --from=builder /out/bin/ /usr/local/bin/
COPY --from=builder /out/lib/ /usr/local/lib/
RUN ldconfig

ENV NVIDIA_VISIBLE_DEVICES=all \
    NVIDIA_DRIVER_CAPABILITIES=compute,utility \
    LD_LIBRARY_PATH=/usr/local/lib:${LD_LIBRARY_PATH}

EXPOSE 18080
VOLUME ["/models"]

ENTRYPOINT ["/usr/local/bin/dflash_server"]