feat(chat): add optional photo upload support
Add vision/multimodal support to chat, allowing users to send images alongside or instead of text prompts. Images are transmitted and persisted as base64 data URLs. Backend: - Add Images []string to Message struct for persistence - Add Images []string to GenerateTextRequest with relaxed validation - Build multimodal user messages using OpenAI SDK content parts - Pass images through from handlers to client - Deep-copy Images slice in message cloning Frontend: - Add images?: string[] to Message and GenerateTextRequest types - Add image selection state and file input handler - Add camera icon button, hidden file input, and image preview strip - Render images in user message bubbles - Pass images through to GenerateTextRequest Tests: - Add TestSendMessageWithImage for vision model testing
This commit is contained in:
@@ -75,7 +75,7 @@ func (c *Client) SendMessage(ctx context.Context, chatMessages []*store.Message,
|
||||
// Map Messages
|
||||
messages := slices.Map(chatMessages, func(m *store.Message) openai.ChatCompletionMessageParamUnion {
|
||||
if m.Role == "user" {
|
||||
return openai.UserMessage(m.Content)
|
||||
return buildUserMessage(m)
|
||||
}
|
||||
return openai.AssistantMessage(m.Content)
|
||||
})
|
||||
@@ -292,3 +292,30 @@ func NewClient(baseURL *url.URL) *Client {
|
||||
oaiClient := openai.NewClient(option.WithBaseURL(baseURL.String()))
|
||||
return &Client{oaiClient: &oaiClient}
|
||||
}
|
||||
|
||||
func buildUserMessage(m *store.Message) openai.ChatCompletionMessageParamUnion {
|
||||
// Simple Text Message
|
||||
if len(m.Images) == 0 {
|
||||
return openai.UserMessage(m.Content)
|
||||
}
|
||||
|
||||
// Build Multimodal Content Parts
|
||||
parts := make([]openai.ChatCompletionContentPartUnionParam, 0, len(m.Images)+1)
|
||||
|
||||
// Add Image Parts
|
||||
for _, imgURL := range m.Images {
|
||||
parts = append(parts, openai.ImageContentPart(
|
||||
openai.ChatCompletionContentPartImageImageURLParam{
|
||||
URL: imgURL,
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
// Add Text Part
|
||||
if m.Content != "" {
|
||||
parts = append(parts, openai.TextContentPart(m.Content))
|
||||
}
|
||||
|
||||
// Build User Message with Content Parts
|
||||
return openai.UserMessage(parts)
|
||||
}
|
||||
|
||||
@@ -92,3 +92,35 @@ func TestSummarizeChat(t *testing.T) {
|
||||
t.Logf("Output: %s", output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSendMessageWithImage(t *testing.T) {
|
||||
t.Skip("requires live LLM API - run manually with: go test -run TestSendMessageWithImage ./internal/client/")
|
||||
|
||||
// Initialize Client
|
||||
baseURL, err := url.Parse("https://llm-api.va.reichard.io/v1")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse base URL: %v", err)
|
||||
}
|
||||
client := NewClient(baseURL)
|
||||
|
||||
// Create Context
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
|
||||
defer cancel()
|
||||
|
||||
// Generate Text Stream
|
||||
_, err = client.SendMessage(ctx, []*store.Message{{
|
||||
Role: "user",
|
||||
Content: "What is in this image?",
|
||||
Images: []string{
|
||||
"https://llm-api.va.reichard.io/v1/images/test.png",
|
||||
},
|
||||
}}, "vllm-qwen3-8b-vision", func(mc *MessageChunk) error {
|
||||
if mc.Message != nil {
|
||||
t.Logf("Received: %s", *mc.Message)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to generate text stream: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user