feat(chat): add optional photo upload support
Add vision/multimodal support to chat, allowing users to send images alongside or instead of text prompts. Images are transmitted and persisted as base64 data URLs. Backend: - Add Images []string to Message struct for persistence - Add Images []string to GenerateTextRequest with relaxed validation - Build multimodal user messages using OpenAI SDK content parts - Pass images through from handlers to client - Deep-copy Images slice in message cloning Frontend: - Add images?: string[] to Message and GenerateTextRequest types - Add image selection state and file input handler - Add camera icon button, hidden file input, and image preview strip - Render images in user message bubbles - Pass images through to GenerateTextRequest Tests: - Add TestSendMessageWithImage for vision model testing
This commit is contained in:
@@ -2,6 +2,7 @@ package api
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
|
"slices"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
@@ -153,6 +154,7 @@ func cloneStoreMessage(msg *store.Message) *store.Message {
|
|||||||
|
|
||||||
// Clone Message
|
// Clone Message
|
||||||
cloned := *msg
|
cloned := *msg
|
||||||
|
cloned.Images = slices.Clone(msg.Images)
|
||||||
if msg.Stats != nil {
|
if msg.Stats != nil {
|
||||||
stats := *msg.Stats
|
stats := *msg.Stats
|
||||||
cloned.Stats = &stats
|
cloned.Stats = &stats
|
||||||
|
|||||||
@@ -325,7 +325,7 @@ func (a *API) PostChat(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Start Message
|
// Start Message
|
||||||
chunk, err := a.startMessageGeneration(chat.ID, genReq.Model, genReq.Prompt)
|
chunk, err := a.startMessageGeneration(chat.ID, genReq.Model, genReq.Prompt, genReq.Images)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.WithError(err).WithField("chat_id", chat.ID).Error("failed to start message generation")
|
log.WithError(err).WithField("chat_id", chat.ID).Error("failed to start message generation")
|
||||||
http.Error(w, "Failed to start message generation", http.StatusInternalServerError)
|
http.Error(w, "Failed to start message generation", http.StatusInternalServerError)
|
||||||
@@ -493,7 +493,7 @@ func (a *API) PostChatMessage(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Start Message
|
// Start Message
|
||||||
chunk, err := a.startMessageGeneration(chatID, genReq.Model, genReq.Prompt)
|
chunk, err := a.startMessageGeneration(chatID, genReq.Model, genReq.Prompt, genReq.Images)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.WithError(err).WithField("chat_id", chatID).Error("failed to start message generation")
|
log.WithError(err).WithField("chat_id", chatID).Error("failed to start message generation")
|
||||||
if errors.Is(err, errGenerationActive) {
|
if errors.Is(err, errGenerationActive) {
|
||||||
@@ -533,7 +533,7 @@ func (a *API) getClient() (*client.Client, error) {
|
|||||||
return a.client, nil
|
return a.client, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *API) startMessageGeneration(chatID uuid.UUID, chatModel, userMessage string) (*MessageChunk, error) {
|
func (a *API) startMessageGeneration(chatID uuid.UUID, chatModel, userMessage string, images []string) (*MessageChunk, error) {
|
||||||
apiClient, err := a.getClient()
|
apiClient, err := a.getClient()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to get client: %w", err)
|
return nil, fmt.Errorf("failed to get client: %w", err)
|
||||||
@@ -548,7 +548,7 @@ func (a *API) startMessageGeneration(chatID uuid.UUID, chatModel, userMessage st
|
|||||||
// persisted, preventing concurrent completions from creating duplicate rows.
|
// persisted, preventing concurrent completions from creating duplicate rows.
|
||||||
if err := a.generationManager.start(chatID, func(_ *generation) error {
|
if err := a.generationManager.start(chatID, func(_ *generation) error {
|
||||||
// Create User Message
|
// Create User Message
|
||||||
userMsg = &store.Message{ChatID: chatID, Role: "user", Content: userMessage}
|
userMsg = &store.Message{ChatID: chatID, Role: "user", Content: userMessage, Images: images}
|
||||||
if err := a.store.SaveChatMessage(userMsg); err != nil {
|
if err := a.store.SaveChatMessage(userMsg); err != nil {
|
||||||
return fmt.Errorf("failed to add user message to chat: %w", err)
|
return fmt.Errorf("failed to add user message to chat: %w", err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -71,14 +71,15 @@ type ImageRecord struct {
|
|||||||
type GenerateTextRequest struct {
|
type GenerateTextRequest struct {
|
||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
Prompt string `json:"prompt"`
|
Prompt string `json:"prompt"`
|
||||||
|
Images []string `json:"images,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *GenerateTextRequest) Validate() error {
|
func (r *GenerateTextRequest) Validate() error {
|
||||||
if r.Model == "" {
|
if r.Model == "" {
|
||||||
return errors.New("model is required")
|
return errors.New("model is required")
|
||||||
}
|
}
|
||||||
if r.Prompt == "" {
|
if r.Prompt == "" && len(r.Images) == 0 {
|
||||||
return errors.New("prompt is required")
|
return errors.New("prompt or images are required")
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -75,7 +75,7 @@ func (c *Client) SendMessage(ctx context.Context, chatMessages []*store.Message,
|
|||||||
// Map Messages
|
// Map Messages
|
||||||
messages := slices.Map(chatMessages, func(m *store.Message) openai.ChatCompletionMessageParamUnion {
|
messages := slices.Map(chatMessages, func(m *store.Message) openai.ChatCompletionMessageParamUnion {
|
||||||
if m.Role == "user" {
|
if m.Role == "user" {
|
||||||
return openai.UserMessage(m.Content)
|
return buildUserMessage(m)
|
||||||
}
|
}
|
||||||
return openai.AssistantMessage(m.Content)
|
return openai.AssistantMessage(m.Content)
|
||||||
})
|
})
|
||||||
@@ -292,3 +292,30 @@ func NewClient(baseURL *url.URL) *Client {
|
|||||||
oaiClient := openai.NewClient(option.WithBaseURL(baseURL.String()))
|
oaiClient := openai.NewClient(option.WithBaseURL(baseURL.String()))
|
||||||
return &Client{oaiClient: &oaiClient}
|
return &Client{oaiClient: &oaiClient}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func buildUserMessage(m *store.Message) openai.ChatCompletionMessageParamUnion {
|
||||||
|
// Simple Text Message
|
||||||
|
if len(m.Images) == 0 {
|
||||||
|
return openai.UserMessage(m.Content)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build Multimodal Content Parts
|
||||||
|
parts := make([]openai.ChatCompletionContentPartUnionParam, 0, len(m.Images)+1)
|
||||||
|
|
||||||
|
// Add Image Parts
|
||||||
|
for _, imgURL := range m.Images {
|
||||||
|
parts = append(parts, openai.ImageContentPart(
|
||||||
|
openai.ChatCompletionContentPartImageImageURLParam{
|
||||||
|
URL: imgURL,
|
||||||
|
},
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add Text Part
|
||||||
|
if m.Content != "" {
|
||||||
|
parts = append(parts, openai.TextContentPart(m.Content))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build User Message with Content Parts
|
||||||
|
return openai.UserMessage(parts)
|
||||||
|
}
|
||||||
|
|||||||
@@ -92,3 +92,35 @@ func TestSummarizeChat(t *testing.T) {
|
|||||||
t.Logf("Output: %s", output)
|
t.Logf("Output: %s", output)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestSendMessageWithImage(t *testing.T) {
|
||||||
|
t.Skip("requires live LLM API - run manually with: go test -run TestSendMessageWithImage ./internal/client/")
|
||||||
|
|
||||||
|
// Initialize Client
|
||||||
|
baseURL, err := url.Parse("https://llm-api.va.reichard.io/v1")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to parse base URL: %v", err)
|
||||||
|
}
|
||||||
|
client := NewClient(baseURL)
|
||||||
|
|
||||||
|
// Create Context
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
// Generate Text Stream
|
||||||
|
_, err = client.SendMessage(ctx, []*store.Message{{
|
||||||
|
Role: "user",
|
||||||
|
Content: "What is in this image?",
|
||||||
|
Images: []string{
|
||||||
|
"https://llm-api.va.reichard.io/v1/images/test.png",
|
||||||
|
},
|
||||||
|
}}, "vllm-qwen3-8b-vision", func(mc *MessageChunk) error {
|
||||||
|
if mc.Message != nil {
|
||||||
|
t.Logf("Received: %s", *mc.Message)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to generate text stream: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -43,6 +43,7 @@ type Message struct {
|
|||||||
Role string `json:"role"`
|
Role string `json:"role"`
|
||||||
Thinking string `json:"thinking"`
|
Thinking string `json:"thinking"`
|
||||||
Content string `json:"content"`
|
Content string `json:"content"`
|
||||||
|
Images []string `json:"images,omitempty"`
|
||||||
Status MessageStatus `json:"status,omitempty"`
|
Status MessageStatus `json:"status,omitempty"`
|
||||||
Stats *types.MessageStats `json:"stats,omitempty"`
|
Stats *types.MessageStats `json:"stats,omitempty"`
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,6 +14,16 @@
|
|||||||
: 'bg-primary-200 text-primary-900 rounded-bl-none'
|
: 'bg-primary-200 text-primary-900 rounded-bl-none'
|
||||||
]"
|
]"
|
||||||
>
|
>
|
||||||
|
<!-- User Images -->
|
||||||
|
<div
|
||||||
|
x-show="message.role === 'user' && message.images && message.images.length > 0"
|
||||||
|
class="flex gap-1 mb-2 flex-wrap"
|
||||||
|
>
|
||||||
|
<template x-for="(img, imgIdx) in message.images" :key="imgIdx">
|
||||||
|
<img :src="img" class="max-w-full h-auto rounded-lg max-h-48" />
|
||||||
|
</template>
|
||||||
|
</div>
|
||||||
|
|
||||||
<!-- Thinking Section -->
|
<!-- Thinking Section -->
|
||||||
<div
|
<div
|
||||||
x-show="message.thinking"
|
x-show="message.thinking"
|
||||||
@@ -161,8 +171,66 @@
|
|||||||
</svg>
|
</svg>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- Image Preview Strip -->
|
||||||
|
<div
|
||||||
|
x-show="selectedImages.length > 0"
|
||||||
|
class="flex gap-2 flex-wrap"
|
||||||
|
>
|
||||||
|
<template x-for="(img, idx) in selectedImages" :key="idx">
|
||||||
|
<div class="relative">
|
||||||
|
<img :src="img" class="w-20 h-20 object-cover rounded-lg" />
|
||||||
|
<button
|
||||||
|
@click="selectedImages.splice(idx, 1)"
|
||||||
|
class="absolute -top-1 -right-1 w-5 h-5 bg-tertiary-700 text-white rounded-full flex items-center justify-center text-xs hover:bg-tertiary-900"
|
||||||
|
>
|
||||||
|
×
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
</div>
|
||||||
|
|
||||||
<!-- Message Form -->
|
<!-- Message Form -->
|
||||||
<form @submit.prevent="sendMessage" class="flex gap-2 items-end">
|
<form @submit.prevent="sendMessage" class="flex gap-2 items-end">
|
||||||
|
<!-- Attach Image Button -->
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
@click="$refs.fileInput.click()"
|
||||||
|
class="self-stretch w-[44px] bg-primary-200 text-primary-700 rounded-xl transition-all flex items-center justify-center flex-shrink-0 hover:bg-primary-300 hover:shadow-md"
|
||||||
|
title="Attach Image"
|
||||||
|
>
|
||||||
|
<svg class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
||||||
|
<path
|
||||||
|
stroke-linecap="round"
|
||||||
|
stroke-linejoin="round"
|
||||||
|
stroke-width="2"
|
||||||
|
d="M3 9a2 2 0 012-2h.93a2 2 0 001.664-.89l.812-1.22A2 2 0 0110.07 4h3.86a2 2 0 011.664.89l.812 1.22A2 2 0 0018.07 7H19a2 2 0 012 2v9a2 2 0 01-2 2H5a2 2 0 01-2-2V9z"
|
||||||
|
/>
|
||||||
|
<path
|
||||||
|
stroke-linecap="round"
|
||||||
|
stroke-linejoin="round"
|
||||||
|
stroke-width="2"
|
||||||
|
d="M15 13a3 3 0 11-6 0 3 3 0 016 0z"
|
||||||
|
/>
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
|
||||||
|
<!-- Hidden File Input -->
|
||||||
|
<input
|
||||||
|
x-ref="fileInput"
|
||||||
|
type="file"
|
||||||
|
accept="image/png, image/jpeg, image/webp"
|
||||||
|
multiple
|
||||||
|
class="hidden"
|
||||||
|
@change="
|
||||||
|
Array.from($event.target.files).forEach(file => {
|
||||||
|
const reader = new FileReader();
|
||||||
|
reader.onload = (e) => { selectedImages.push(e.target.result as string); };
|
||||||
|
reader.readAsDataURL(file);
|
||||||
|
});
|
||||||
|
$event.target.value = '';
|
||||||
|
"
|
||||||
|
/>
|
||||||
|
|
||||||
<textarea
|
<textarea
|
||||||
x-model="inputMessage"
|
x-model="inputMessage"
|
||||||
placeholder="Type your message..."
|
placeholder="Type your message..."
|
||||||
@@ -174,8 +242,8 @@
|
|||||||
|
|
||||||
<button
|
<button
|
||||||
type="submit"
|
type="submit"
|
||||||
:disabled="!inputMessage.trim() || loading"
|
:disabled="(!inputMessage.trim() && selectedImages.length === 0) || loading"
|
||||||
:class="(!inputMessage.trim() || loading) ? 'opacity-50 cursor-not-allowed' : 'hover:shadow-md hover:scale-105'"
|
:class=" ((!inputMessage.trim() && selectedImages.length === 0) || loading) ? 'opacity-50 cursor-not-allowed' : 'hover:shadow-md hover:scale-105'"
|
||||||
class="self-stretch w-[44px] bg-gradient-to-r from-primary-600 to-primary-500 text-white rounded-xl transition-all flex items-center justify-center flex-shrink-0"
|
class="self-stretch w-[44px] bg-gradient-to-r from-primary-600 to-primary-500 text-white rounded-xl transition-all flex items-center justify-center flex-shrink-0"
|
||||||
>
|
>
|
||||||
<template x-if="loading">
|
<template x-if="loading">
|
||||||
|
|||||||
@@ -37,6 +37,7 @@ Alpine.data('chatManager', () => ({
|
|||||||
|
|
||||||
selectedModel: '',
|
selectedModel: '',
|
||||||
inputMessage: '',
|
inputMessage: '',
|
||||||
|
selectedImages: [] as string[],
|
||||||
error: '',
|
error: '',
|
||||||
|
|
||||||
selectedChatID: null as string | null,
|
selectedChatID: null as string | null,
|
||||||
@@ -88,10 +89,12 @@ Alpine.data('chatManager', () => ({
|
|||||||
|
|
||||||
async sendMessage() {
|
async sendMessage() {
|
||||||
const message = this.inputMessage.trim();
|
const message = this.inputMessage.trim();
|
||||||
if (!message || this.loading) return;
|
if ((!message && this.selectedImages.length === 0) || this.loading) return;
|
||||||
|
|
||||||
// Update State
|
// Update State
|
||||||
|
const images = [...this.selectedImages];
|
||||||
this.inputMessage = '';
|
this.inputMessage = '';
|
||||||
|
this.selectedImages = [];
|
||||||
this.loading = true;
|
this.loading = true;
|
||||||
this.error = '';
|
this.error = '';
|
||||||
|
|
||||||
@@ -121,6 +124,7 @@ Alpine.data('chatManager', () => ({
|
|||||||
role: 'user',
|
role: 'user',
|
||||||
thinking: '',
|
thinking: '',
|
||||||
content: message,
|
content: message,
|
||||||
|
images: images,
|
||||||
created_at: new Date().toISOString(),
|
created_at: new Date().toISOString(),
|
||||||
});
|
});
|
||||||
currentChat.message_count += 1;
|
currentChat.message_count += 1;
|
||||||
@@ -128,7 +132,7 @@ Alpine.data('chatManager', () => ({
|
|||||||
try {
|
try {
|
||||||
await sendMessage(
|
await sendMessage(
|
||||||
this.selectedChatID === IN_PROGRESS_UUID ? '' : this.selectedChatID,
|
this.selectedChatID === IN_PROGRESS_UUID ? '' : this.selectedChatID,
|
||||||
{ model: this.selectedModel, prompt: message },
|
{ model: this.selectedModel, prompt: message, images },
|
||||||
(chunk: MessageChunk) => {
|
(chunk: MessageChunk) => {
|
||||||
if (chunk.chat) this.activeStreamChatID = chunk.chat.id;
|
if (chunk.chat) this.activeStreamChatID = chunk.chat.id;
|
||||||
this.applyMessageChunk(chunk);
|
this.applyMessageChunk(chunk);
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ export interface Message {
|
|||||||
role: 'user' | 'assistant';
|
role: 'user' | 'assistant';
|
||||||
thinking: string;
|
thinking: string;
|
||||||
content: string;
|
content: string;
|
||||||
|
images?: string[];
|
||||||
status?: MessageStatus;
|
status?: MessageStatus;
|
||||||
stats?: MessageStats;
|
stats?: MessageStats;
|
||||||
}
|
}
|
||||||
@@ -69,6 +70,7 @@ export interface GenerateImageRequest {
|
|||||||
export interface GenerateTextRequest {
|
export interface GenerateTextRequest {
|
||||||
model: string;
|
model: string;
|
||||||
prompt: string;
|
prompt: string;
|
||||||
|
images?: string[];
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ChatListResponse {
|
export interface ChatListResponse {
|
||||||
|
|||||||
Reference in New Issue
Block a user