Files
dflash-server-docker/Makefile
Evan Reichard ab19369966 feat: initial dflash-server docker packaging
Multi-stage CUDA build of the native dflash_server from
Luce-Org/lucebox-hub (pinned at 42f36f1). Models are not baked
into the image; mount /models at runtime.

- Dockerfile: nvidia/cuda:12.6.0 devel -> runtime, CUDA_ARCH build-arg
  (default sm_86), libcuda.so.1 stub symlink + -rpath-link fix
- docker-compose.yml: reference service with ./models:/models:ro
- Makefile: submodules / doctor / build / run / shell / up-down-logs /
  push / clean. push targets gitea.va.reichard.io/evan
- README + .dockerignore + .gitignore
2026-05-21 09:24:57 -04:00

138 lines
5.3 KiB
Makefile

# dflash_server docker workflow.
#
# Common targets:
# make build # ensure submodules + build the image (slow: full CUDA compile)
# make run # run with the reference flag set, mounts ./models
# make up / down # docker compose lifecycle
# make shell # interactive shell in the built image (no entrypoint)
# make push # tag and push image to $(REGISTRY)
# make clean # remove the image
DOCKER ?= docker
COMPOSE ?= $(DOCKER) compose
IMAGE ?= dflash-server
TAG ?= latest
REGISTRY ?= gitea.va.reichard.io/evan
REMOTE_IMAGE ?= $(REGISTRY)/$(IMAGE):$(TAG)
CUDA_ARCH ?= 86
CUDA_VERSION ?= 12.6.0
HOST_PORT ?= 18080
MODELS_DIR ?= $(CURDIR)/models
TARGET_MODEL ?= /models/Qwen3.6-27B-Q4_K_M.gguf
DRAFT_MODEL ?= /models/draft/dflash-draft-3.6-q8_0.gguf
REPO_ROOT := $(shell git -C $(CURDIR) rev-parse --show-toplevel 2>/dev/null)
SUBMODULE_SENTINELS := \
lucebox-hub/dflash/CMakeLists.txt \
lucebox-hub/dflash/deps/llama.cpp/CMakeLists.txt \
lucebox-hub/dflash/deps/Block-Sparse-Attention/csrc/cutlass/include/cutlass/numeric_types.h
.DEFAULT_GOAL := help
.PHONY: help
help:
@awk 'BEGIN {FS = ":.*##"; printf "Targets:\n"} /^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-14s\033[0m %s\n", $$1, $$2 }' $(MAKEFILE_LIST)
# ─── Host setup ───────────────────────────────────────────────────────────────
.PHONY: submodules
submodules: ## Ensure git submodules (incl. nested) are initialized
@if [ -z "$(REPO_ROOT)" ]; then \
echo "ERROR: not inside a git working tree; cannot init submodules" >&2; \
exit 1; \
fi
@missing=0; for f in $(SUBMODULE_SENTINELS); do \
if [ ! -f "$(CURDIR)/$$f" ]; then missing=1; break; fi; \
done; \
if [ $$missing -eq 1 ]; then \
echo ">> Initializing git submodules (recursive) under $(REPO_ROOT)"; \
git -C "$(REPO_ROOT)" submodule update --init --recursive; \
else \
echo ">> Submodules already present"; \
fi
.PHONY: doctor
doctor: ## Check host prerequisites (docker, submodules)
@command -v $(DOCKER) >/dev/null || { echo "ERROR: '$(DOCKER)' not found in PATH" >&2; exit 1; }
@$(DOCKER) info >/dev/null 2>&1 || { echo "ERROR: '$(DOCKER) info' failed; daemon not reachable" >&2; exit 1; }
@echo ">> docker OK ($$($(DOCKER) --version))"
@for f in $(SUBMODULE_SENTINELS); do \
if [ ! -f "$(CURDIR)/$$f" ]; then \
echo "WARN: submodule file missing: $$f (run 'make submodules')" >&2; \
fi; \
done
# ─── Build ────────────────────────────────────────────────────────────────────
.PHONY: build
build: submodules ## Build the docker image (full CUDA compile; takes a long time)
$(DOCKER) build \
--build-arg CUDA_ARCH=$(CUDA_ARCH) \
--build-arg CUDA_VERSION=$(CUDA_VERSION) \
-t $(IMAGE):$(TAG) \
-f Dockerfile \
.
.PHONY: rebuild
rebuild: submodules ## Rebuild without cache
$(DOCKER) build --no-cache \
--build-arg CUDA_ARCH=$(CUDA_ARCH) \
--build-arg CUDA_VERSION=$(CUDA_VERSION) \
-t $(IMAGE):$(TAG) \
-f Dockerfile \
.
# ─── Run ──────────────────────────────────────────────────────────────────────
.PHONY: run
run: ## Run server with reference flag set (uses ./models)
@if [ ! -d "$(MODELS_DIR)" ]; then \
echo "ERROR: $(MODELS_DIR) does not exist. Place GGUFs there or override MODELS_DIR=." >&2; \
exit 1; \
fi
$(DOCKER) run --rm -it --gpus all \
-v $(MODELS_DIR):/models:ro \
-p $(HOST_PORT):18080 \
$(IMAGE):$(TAG) \
$(TARGET_MODEL) \
--draft $(DRAFT_MODEL) \
--host 0.0.0.0 --port 18080 \
--max-ctx 32768 --max-tokens 512 \
--fa-window 2048 \
--ddtree --ddtree-budget 22 \
--model-name luce-dflash
.PHONY: shell
shell: ## Interactive shell inside the image (overrides entrypoint)
$(DOCKER) run --rm -it --entrypoint /bin/bash \
-v $(MODELS_DIR):/models:ro \
$(IMAGE):$(TAG)
# ─── Compose ──────────────────────────────────────────────────────────────────
.PHONY: up
up: ## docker compose up -d
$(COMPOSE) up -d
.PHONY: down
down: ## docker compose down
$(COMPOSE) down
.PHONY: logs
logs: ## tail compose logs
$(COMPOSE) logs -f
# ─── Publish ──────────────────────────────────────────────────────────────────
.PHONY: push
push: ## Tag and push image to $(REGISTRY)
$(DOCKER) tag $(IMAGE):$(TAG) $(REMOTE_IMAGE)
$(DOCKER) push $(REMOTE_IMAGE)
# ─── Clean ────────────────────────────────────────────────────────────────────
.PHONY: clean
clean: ## Remove the built image
-$(DOCKER) rmi $(IMAGE):$(TAG)
-$(DOCKER) rmi $(REMOTE_IMAGE)