services: dflash-server: build: context: . dockerfile: Dockerfile args: CUDA_ARCH: "86" image: dflash-server:latest container_name: dflash-server restart: unless-stopped ports: - "18080:18080" volumes: - ./models:/models:ro deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] command: - /models/Qwen3.6-27B-Q4_K_M.gguf - --draft - /models/draft/dflash-draft-3.6-q8_0.gguf - --host - 0.0.0.0 - --port - "18080" - --max-ctx - "32768" - --max-tokens - "512" - --fa-window - "2048" - --ddtree - --ddtree-budget - "22" - --model-name - luce-dflash