feat: initial dflash-server docker packaging
Multi-stage CUDA build of the native dflash_server from Luce-Org/lucebox-hub (pinned at 42f36f1). Models are not baked into the image; mount /models at runtime. - Dockerfile: nvidia/cuda:12.6.0 devel -> runtime, CUDA_ARCH build-arg (default sm_86), libcuda.so.1 stub symlink + -rpath-link fix - docker-compose.yml: reference service with ./models:/models:ro - Makefile: submodules / doctor / build / run / shell / up-down-logs / push / clean. push targets gitea.va.reichard.io/evan - README + .dockerignore + .gitignore
This commit is contained in:
137
Makefile
Normal file
137
Makefile
Normal file
@@ -0,0 +1,137 @@
|
||||
# dflash_server docker workflow.
|
||||
#
|
||||
# Common targets:
|
||||
# make build # ensure submodules + build the image (slow: full CUDA compile)
|
||||
# make run # run with the reference flag set, mounts ./models
|
||||
# make up / down # docker compose lifecycle
|
||||
# make shell # interactive shell in the built image (no entrypoint)
|
||||
# make push # tag and push image to $(REGISTRY)
|
||||
# make clean # remove the image
|
||||
|
||||
DOCKER ?= docker
|
||||
COMPOSE ?= $(DOCKER) compose
|
||||
IMAGE ?= dflash-server
|
||||
TAG ?= latest
|
||||
REGISTRY ?= gitea.va.reichard.io/evan
|
||||
REMOTE_IMAGE ?= $(REGISTRY)/$(IMAGE):$(TAG)
|
||||
CUDA_ARCH ?= 86
|
||||
CUDA_VERSION ?= 12.6.0
|
||||
HOST_PORT ?= 18080
|
||||
MODELS_DIR ?= $(CURDIR)/models
|
||||
TARGET_MODEL ?= /models/Qwen3.6-27B-Q4_K_M.gguf
|
||||
DRAFT_MODEL ?= /models/draft/dflash-draft-3.6-q8_0.gguf
|
||||
|
||||
REPO_ROOT := $(shell git -C $(CURDIR) rev-parse --show-toplevel 2>/dev/null)
|
||||
SUBMODULE_SENTINELS := \
|
||||
lucebox-hub/dflash/CMakeLists.txt \
|
||||
lucebox-hub/dflash/deps/llama.cpp/CMakeLists.txt \
|
||||
lucebox-hub/dflash/deps/Block-Sparse-Attention/csrc/cutlass/include/cutlass/numeric_types.h
|
||||
|
||||
.DEFAULT_GOAL := help
|
||||
|
||||
.PHONY: help
|
||||
help:
|
||||
@awk 'BEGIN {FS = ":.*##"; printf "Targets:\n"} /^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-14s\033[0m %s\n", $$1, $$2 }' $(MAKEFILE_LIST)
|
||||
|
||||
# ─── Host setup ───────────────────────────────────────────────────────────────
|
||||
|
||||
.PHONY: submodules
|
||||
submodules: ## Ensure git submodules (incl. nested) are initialized
|
||||
@if [ -z "$(REPO_ROOT)" ]; then \
|
||||
echo "ERROR: not inside a git working tree; cannot init submodules" >&2; \
|
||||
exit 1; \
|
||||
fi
|
||||
@missing=0; for f in $(SUBMODULE_SENTINELS); do \
|
||||
if [ ! -f "$(CURDIR)/$$f" ]; then missing=1; break; fi; \
|
||||
done; \
|
||||
if [ $$missing -eq 1 ]; then \
|
||||
echo ">> Initializing git submodules (recursive) under $(REPO_ROOT)"; \
|
||||
git -C "$(REPO_ROOT)" submodule update --init --recursive; \
|
||||
else \
|
||||
echo ">> Submodules already present"; \
|
||||
fi
|
||||
|
||||
.PHONY: doctor
|
||||
doctor: ## Check host prerequisites (docker, submodules)
|
||||
@command -v $(DOCKER) >/dev/null || { echo "ERROR: '$(DOCKER)' not found in PATH" >&2; exit 1; }
|
||||
@$(DOCKER) info >/dev/null 2>&1 || { echo "ERROR: '$(DOCKER) info' failed; daemon not reachable" >&2; exit 1; }
|
||||
@echo ">> docker OK ($$($(DOCKER) --version))"
|
||||
@for f in $(SUBMODULE_SENTINELS); do \
|
||||
if [ ! -f "$(CURDIR)/$$f" ]; then \
|
||||
echo "WARN: submodule file missing: $$f (run 'make submodules')" >&2; \
|
||||
fi; \
|
||||
done
|
||||
|
||||
# ─── Build ────────────────────────────────────────────────────────────────────
|
||||
|
||||
.PHONY: build
|
||||
build: submodules ## Build the docker image (full CUDA compile; takes a long time)
|
||||
$(DOCKER) build \
|
||||
--build-arg CUDA_ARCH=$(CUDA_ARCH) \
|
||||
--build-arg CUDA_VERSION=$(CUDA_VERSION) \
|
||||
-t $(IMAGE):$(TAG) \
|
||||
-f Dockerfile \
|
||||
.
|
||||
|
||||
.PHONY: rebuild
|
||||
rebuild: submodules ## Rebuild without cache
|
||||
$(DOCKER) build --no-cache \
|
||||
--build-arg CUDA_ARCH=$(CUDA_ARCH) \
|
||||
--build-arg CUDA_VERSION=$(CUDA_VERSION) \
|
||||
-t $(IMAGE):$(TAG) \
|
||||
-f Dockerfile \
|
||||
.
|
||||
|
||||
# ─── Run ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
.PHONY: run
|
||||
run: ## Run server with reference flag set (uses ./models)
|
||||
@if [ ! -d "$(MODELS_DIR)" ]; then \
|
||||
echo "ERROR: $(MODELS_DIR) does not exist. Place GGUFs there or override MODELS_DIR=." >&2; \
|
||||
exit 1; \
|
||||
fi
|
||||
$(DOCKER) run --rm -it --gpus all \
|
||||
-v $(MODELS_DIR):/models:ro \
|
||||
-p $(HOST_PORT):18080 \
|
||||
$(IMAGE):$(TAG) \
|
||||
$(TARGET_MODEL) \
|
||||
--draft $(DRAFT_MODEL) \
|
||||
--host 0.0.0.0 --port 18080 \
|
||||
--max-ctx 32768 --max-tokens 512 \
|
||||
--fa-window 2048 \
|
||||
--ddtree --ddtree-budget 22 \
|
||||
--model-name luce-dflash
|
||||
|
||||
.PHONY: shell
|
||||
shell: ## Interactive shell inside the image (overrides entrypoint)
|
||||
$(DOCKER) run --rm -it --entrypoint /bin/bash \
|
||||
-v $(MODELS_DIR):/models:ro \
|
||||
$(IMAGE):$(TAG)
|
||||
|
||||
# ─── Compose ──────────────────────────────────────────────────────────────────
|
||||
|
||||
.PHONY: up
|
||||
up: ## docker compose up -d
|
||||
$(COMPOSE) up -d
|
||||
|
||||
.PHONY: down
|
||||
down: ## docker compose down
|
||||
$(COMPOSE) down
|
||||
|
||||
.PHONY: logs
|
||||
logs: ## tail compose logs
|
||||
$(COMPOSE) logs -f
|
||||
|
||||
# ─── Publish ──────────────────────────────────────────────────────────────────
|
||||
|
||||
.PHONY: push
|
||||
push: ## Tag and push image to $(REGISTRY)
|
||||
$(DOCKER) tag $(IMAGE):$(TAG) $(REMOTE_IMAGE)
|
||||
$(DOCKER) push $(REMOTE_IMAGE)
|
||||
|
||||
# ─── Clean ────────────────────────────────────────────────────────────────────
|
||||
|
||||
.PHONY: clean
|
||||
clean: ## Remove the built image
|
||||
-$(DOCKER) rmi $(IMAGE):$(TAG)
|
||||
-$(DOCKER) rmi $(REMOTE_IMAGE)
|
||||
Reference in New Issue
Block a user