# dflash_server docker workflow. # # Common targets: # make build # ensure submodules + build the image (slow: full CUDA compile) # make run # run with the reference flag set, mounts ./models # make up / down # docker compose lifecycle # make shell # interactive shell in the built image (no entrypoint) # make push # tag and push image to $(REGISTRY) # make clean # remove the image DOCKER ?= docker COMPOSE ?= $(DOCKER) compose IMAGE ?= dflash-server TAG ?= latest REGISTRY ?= gitea.va.reichard.io/evan REMOTE_IMAGE ?= $(REGISTRY)/$(IMAGE):$(TAG) CUDA_ARCH ?= 86 CUDA_VERSION ?= 12.6.0 HOST_PORT ?= 18080 MODELS_DIR ?= $(CURDIR)/models TARGET_MODEL ?= /models/Qwen3.6-27B-Q4_K_M.gguf DRAFT_MODEL ?= /models/draft/dflash-draft-3.6-q8_0.gguf REPO_ROOT := $(shell git -C $(CURDIR) rev-parse --show-toplevel 2>/dev/null) SUBMODULE_SENTINELS := \ lucebox-hub/dflash/CMakeLists.txt \ lucebox-hub/dflash/deps/llama.cpp/CMakeLists.txt \ lucebox-hub/dflash/deps/Block-Sparse-Attention/csrc/cutlass/include/cutlass/numeric_types.h .DEFAULT_GOAL := help .PHONY: help help: @awk 'BEGIN {FS = ":.*##"; printf "Targets:\n"} /^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-14s\033[0m %s\n", $$1, $$2 }' $(MAKEFILE_LIST) # ─── Host setup ─────────────────────────────────────────────────────────────── .PHONY: submodules submodules: ## Ensure git submodules (incl. nested) are initialized @if [ -z "$(REPO_ROOT)" ]; then \ echo "ERROR: not inside a git working tree; cannot init submodules" >&2; \ exit 1; \ fi @missing=0; for f in $(SUBMODULE_SENTINELS); do \ if [ ! -f "$(CURDIR)/$$f" ]; then missing=1; break; fi; \ done; \ if [ $$missing -eq 1 ]; then \ echo ">> Initializing git submodules (recursive) under $(REPO_ROOT)"; \ git -C "$(REPO_ROOT)" submodule update --init --recursive; \ else \ echo ">> Submodules already present"; \ fi .PHONY: doctor doctor: ## Check host prerequisites (docker, submodules) @command -v $(DOCKER) >/dev/null || { echo "ERROR: '$(DOCKER)' not found in PATH" >&2; exit 1; } @$(DOCKER) info >/dev/null 2>&1 || { echo "ERROR: '$(DOCKER) info' failed; daemon not reachable" >&2; exit 1; } @echo ">> docker OK ($$($(DOCKER) --version))" @for f in $(SUBMODULE_SENTINELS); do \ if [ ! -f "$(CURDIR)/$$f" ]; then \ echo "WARN: submodule file missing: $$f (run 'make submodules')" >&2; \ fi; \ done # ─── Build ──────────────────────────────────────────────────────────────────── .PHONY: build build: submodules ## Build the docker image (full CUDA compile; takes a long time) $(DOCKER) build \ --build-arg CUDA_ARCH=$(CUDA_ARCH) \ --build-arg CUDA_VERSION=$(CUDA_VERSION) \ -t $(IMAGE):$(TAG) \ -f Dockerfile \ . .PHONY: rebuild rebuild: submodules ## Rebuild without cache $(DOCKER) build --no-cache \ --build-arg CUDA_ARCH=$(CUDA_ARCH) \ --build-arg CUDA_VERSION=$(CUDA_VERSION) \ -t $(IMAGE):$(TAG) \ -f Dockerfile \ . # ─── Run ────────────────────────────────────────────────────────────────────── .PHONY: run run: ## Run server with reference flag set (uses ./models) @if [ ! -d "$(MODELS_DIR)" ]; then \ echo "ERROR: $(MODELS_DIR) does not exist. Place GGUFs there or override MODELS_DIR=." >&2; \ exit 1; \ fi $(DOCKER) run --rm -it --gpus all \ -v $(MODELS_DIR):/models:ro \ -p $(HOST_PORT):18080 \ $(IMAGE):$(TAG) \ $(TARGET_MODEL) \ --draft $(DRAFT_MODEL) \ --host 0.0.0.0 --port 18080 \ --max-ctx 32768 --max-tokens 512 \ --fa-window 2048 \ --ddtree --ddtree-budget 22 \ --model-name luce-dflash .PHONY: shell shell: ## Interactive shell inside the image (overrides entrypoint) $(DOCKER) run --rm -it --entrypoint /bin/bash \ -v $(MODELS_DIR):/models:ro \ $(IMAGE):$(TAG) # ─── Compose ────────────────────────────────────────────────────────────────── .PHONY: up up: ## docker compose up -d $(COMPOSE) up -d .PHONY: down down: ## docker compose down $(COMPOSE) down .PHONY: logs logs: ## tail compose logs $(COMPOSE) logs -f # ─── Publish ────────────────────────────────────────────────────────────────── .PHONY: push push: ## Tag and push image to $(REGISTRY) $(DOCKER) tag $(IMAGE):$(TAG) $(REMOTE_IMAGE) $(DOCKER) push $(REMOTE_IMAGE) # ─── Clean ──────────────────────────────────────────────────────────────────── .PHONY: clean clean: ## Remove the built image -$(DOCKER) rmi $(IMAGE):$(TAG) -$(DOCKER) rmi $(REMOTE_IMAGE)