From e60b1ea8d5111038cf8f86385d0c771685c2aa2f Mon Sep 17 00:00:00 2001 From: Evan Reichard Date: Fri, 1 May 2026 18:27:09 -0400 Subject: [PATCH] feat(chat): add optional photo upload support Add vision/multimodal support to chat, allowing users to send images alongside or instead of text prompts. Images are transmitted and persisted as base64 data URLs. Backend: - Add Images []string to Message struct for persistence - Add Images []string to GenerateTextRequest with relaxed validation - Build multimodal user messages using OpenAI SDK content parts - Pass images through from handlers to client - Deep-copy Images slice in message cloning Frontend: - Add images?: string[] to Message and GenerateTextRequest types - Add image selection state and file input handler - Add camera icon button, hidden file input, and image preview strip - Render images in user message bubbles - Pass images through to GenerateTextRequest Tests: - Add TestSendMessageWithImage for vision model testing --- backend/internal/api/generation.go | 2 + backend/internal/api/handlers.go | 8 +-- backend/internal/api/types.go | 9 ++-- backend/internal/client/client.go | 29 ++++++++++- backend/internal/client/client_test.go | 32 ++++++++++++ backend/internal/store/types.go | 1 + frontend/public/pages/chats.html | 72 +++++++++++++++++++++++++- frontend/src/components/chatManager.ts | 8 ++- frontend/src/types/index.ts | 2 + 9 files changed, 150 insertions(+), 13 deletions(-) diff --git a/backend/internal/api/generation.go b/backend/internal/api/generation.go index 1efb7ef..1804946 100644 --- a/backend/internal/api/generation.go +++ b/backend/internal/api/generation.go @@ -2,6 +2,7 @@ package api import ( "errors" + "slices" "sync" "github.com/google/uuid" @@ -153,6 +154,7 @@ func cloneStoreMessage(msg *store.Message) *store.Message { // Clone Message cloned := *msg + cloned.Images = slices.Clone(msg.Images) if msg.Stats != nil { stats := *msg.Stats cloned.Stats = &stats diff --git a/backend/internal/api/handlers.go b/backend/internal/api/handlers.go index 7f9ae7c..cea01e4 100644 --- a/backend/internal/api/handlers.go +++ b/backend/internal/api/handlers.go @@ -325,7 +325,7 @@ func (a *API) PostChat(w http.ResponseWriter, r *http.Request) { } // Start Message - chunk, err := a.startMessageGeneration(chat.ID, genReq.Model, genReq.Prompt) + chunk, err := a.startMessageGeneration(chat.ID, genReq.Model, genReq.Prompt, genReq.Images) if err != nil { log.WithError(err).WithField("chat_id", chat.ID).Error("failed to start message generation") http.Error(w, "Failed to start message generation", http.StatusInternalServerError) @@ -493,7 +493,7 @@ func (a *API) PostChatMessage(w http.ResponseWriter, r *http.Request) { } // Start Message - chunk, err := a.startMessageGeneration(chatID, genReq.Model, genReq.Prompt) + chunk, err := a.startMessageGeneration(chatID, genReq.Model, genReq.Prompt, genReq.Images) if err != nil { log.WithError(err).WithField("chat_id", chatID).Error("failed to start message generation") if errors.Is(err, errGenerationActive) { @@ -533,7 +533,7 @@ func (a *API) getClient() (*client.Client, error) { return a.client, nil } -func (a *API) startMessageGeneration(chatID uuid.UUID, chatModel, userMessage string) (*MessageChunk, error) { +func (a *API) startMessageGeneration(chatID uuid.UUID, chatModel, userMessage string, images []string) (*MessageChunk, error) { apiClient, err := a.getClient() if err != nil { return nil, fmt.Errorf("failed to get client: %w", err) @@ -548,7 +548,7 @@ func (a *API) startMessageGeneration(chatID uuid.UUID, chatModel, userMessage st // persisted, preventing concurrent completions from creating duplicate rows. if err := a.generationManager.start(chatID, func(_ *generation) error { // Create User Message - userMsg = &store.Message{ChatID: chatID, Role: "user", Content: userMessage} + userMsg = &store.Message{ChatID: chatID, Role: "user", Content: userMessage, Images: images} if err := a.store.SaveChatMessage(userMsg); err != nil { return fmt.Errorf("failed to add user message to chat: %w", err) } diff --git a/backend/internal/api/types.go b/backend/internal/api/types.go index 0011224..e481e30 100644 --- a/backend/internal/api/types.go +++ b/backend/internal/api/types.go @@ -69,16 +69,17 @@ type ImageRecord struct { } type GenerateTextRequest struct { - Model string `json:"model"` - Prompt string `json:"prompt"` + Model string `json:"model"` + Prompt string `json:"prompt"` + Images []string `json:"images,omitempty"` } func (r *GenerateTextRequest) Validate() error { if r.Model == "" { return errors.New("model is required") } - if r.Prompt == "" { - return errors.New("prompt is required") + if r.Prompt == "" && len(r.Images) == 0 { + return errors.New("prompt or images are required") } return nil } diff --git a/backend/internal/client/client.go b/backend/internal/client/client.go index 6dc2554..e10726e 100644 --- a/backend/internal/client/client.go +++ b/backend/internal/client/client.go @@ -75,7 +75,7 @@ func (c *Client) SendMessage(ctx context.Context, chatMessages []*store.Message, // Map Messages messages := slices.Map(chatMessages, func(m *store.Message) openai.ChatCompletionMessageParamUnion { if m.Role == "user" { - return openai.UserMessage(m.Content) + return buildUserMessage(m) } return openai.AssistantMessage(m.Content) }) @@ -292,3 +292,30 @@ func NewClient(baseURL *url.URL) *Client { oaiClient := openai.NewClient(option.WithBaseURL(baseURL.String())) return &Client{oaiClient: &oaiClient} } + +func buildUserMessage(m *store.Message) openai.ChatCompletionMessageParamUnion { + // Simple Text Message + if len(m.Images) == 0 { + return openai.UserMessage(m.Content) + } + + // Build Multimodal Content Parts + parts := make([]openai.ChatCompletionContentPartUnionParam, 0, len(m.Images)+1) + + // Add Image Parts + for _, imgURL := range m.Images { + parts = append(parts, openai.ImageContentPart( + openai.ChatCompletionContentPartImageImageURLParam{ + URL: imgURL, + }, + )) + } + + // Add Text Part + if m.Content != "" { + parts = append(parts, openai.TextContentPart(m.Content)) + } + + // Build User Message with Content Parts + return openai.UserMessage(parts) +} diff --git a/backend/internal/client/client_test.go b/backend/internal/client/client_test.go index ba0d22b..b354f0f 100644 --- a/backend/internal/client/client_test.go +++ b/backend/internal/client/client_test.go @@ -92,3 +92,35 @@ func TestSummarizeChat(t *testing.T) { t.Logf("Output: %s", output) } } + +func TestSendMessageWithImage(t *testing.T) { + t.Skip("requires live LLM API - run manually with: go test -run TestSendMessageWithImage ./internal/client/") + + // Initialize Client + baseURL, err := url.Parse("https://llm-api.va.reichard.io/v1") + if err != nil { + t.Fatalf("Failed to parse base URL: %v", err) + } + client := NewClient(baseURL) + + // Create Context + ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second) + defer cancel() + + // Generate Text Stream + _, err = client.SendMessage(ctx, []*store.Message{{ + Role: "user", + Content: "What is in this image?", + Images: []string{ + "https://llm-api.va.reichard.io/v1/images/test.png", + }, + }}, "vllm-qwen3-8b-vision", func(mc *MessageChunk) error { + if mc.Message != nil { + t.Logf("Received: %s", *mc.Message) + } + return nil + }) + if err != nil { + t.Fatalf("Failed to generate text stream: %v", err) + } +} diff --git a/backend/internal/store/types.go b/backend/internal/store/types.go index 9eeb2bf..0f7956f 100644 --- a/backend/internal/store/types.go +++ b/backend/internal/store/types.go @@ -43,6 +43,7 @@ type Message struct { Role string `json:"role"` Thinking string `json:"thinking"` Content string `json:"content"` + Images []string `json:"images,omitempty"` Status MessageStatus `json:"status,omitempty"` Stats *types.MessageStats `json:"stats,omitempty"` } diff --git a/frontend/public/pages/chats.html b/frontend/public/pages/chats.html index a60c685..80ac804 100644 --- a/frontend/public/pages/chats.html +++ b/frontend/public/pages/chats.html @@ -14,6 +14,16 @@ : 'bg-primary-200 text-primary-900 rounded-bl-none' ]" > + +
+ +
+
+ +
+ +
+
+ + + + + +