fix(client): support vLLM "reasoning" field for thinking blocks
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
vLLM sends thinking content in a "reasoning" delta field, unlike DeepSeek which uses "reasoning_content". Check both field names so thinking blocks render for vLLM-hosted models like qwen3.6-27b-thinking. Also update client tests to exercise thinking output and skip by default so they don't run in Drone CI (require live LLM API).
This commit is contained in:
@@ -115,15 +115,19 @@ func (c *Client) SendMessage(ctx context.Context, chatMessages []*store.Message,
|
|||||||
if len(chunk.Choices) > 0 {
|
if len(chunk.Choices) > 0 {
|
||||||
delta := chunk.Choices[0].Delta
|
delta := chunk.Choices[0].Delta
|
||||||
|
|
||||||
// Check Thinking
|
// Check Thinking - Support both "reasoning_content" (DeepSeek)
|
||||||
if thinkingField, found := delta.JSON.ExtraFields["reasoning_content"]; found {
|
// and "reasoning" (vLLM) field names.
|
||||||
var thinkingContent string
|
for _, thinkingKey := range []string{"reasoning_content", "reasoning"} {
|
||||||
if err := json.Unmarshal([]byte(thinkingField.Raw()), &thinkingContent); err != nil {
|
if thinkingField, found := delta.JSON.ExtraFields[thinkingKey]; found {
|
||||||
return respContent, fmt.Errorf("thinking unmarshal error: %w", err)
|
var thinkingContent string
|
||||||
} else if thinkingContent != "" {
|
if err := json.Unmarshal([]byte(thinkingField.Raw()), &thinkingContent); err != nil {
|
||||||
msgStats.RecordFirstToken()
|
return respContent, fmt.Errorf("thinking unmarshal error: %w", err)
|
||||||
sendUpdate = true
|
} else if thinkingContent != "" {
|
||||||
msgChunk.Thinking = ptr.Of(thinkingContent)
|
msgStats.RecordFirstToken()
|
||||||
|
sendUpdate = true
|
||||||
|
msgChunk.Thinking = ptr.Of(thinkingContent)
|
||||||
|
}
|
||||||
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -10,9 +10,11 @@ import (
|
|||||||
"reichard.io/aethera/internal/store"
|
"reichard.io/aethera/internal/store"
|
||||||
)
|
)
|
||||||
|
|
||||||
const model = "devstral-small-2-instruct"
|
const model = "vllm-qwen3.6-27b-thinking"
|
||||||
|
|
||||||
func TestSendMessage(t *testing.T) {
|
func TestSendMessage(t *testing.T) {
|
||||||
|
t.Skip("requires live LLM API - run manually with: go test -run TestSendMessage ./internal/client/")
|
||||||
|
|
||||||
// Initialize Client
|
// Initialize Client
|
||||||
baseURL, err := url.Parse("https://llm-api.va.reichard.io/v1")
|
baseURL, err := url.Parse("https://llm-api.va.reichard.io/v1")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -21,17 +23,21 @@ func TestSendMessage(t *testing.T) {
|
|||||||
client := NewClient(baseURL)
|
client := NewClient(baseURL)
|
||||||
|
|
||||||
// Create Context
|
// Create Context
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
// Generate Text Stream
|
// Generate Text Stream
|
||||||
var buf bytes.Buffer
|
var contentBuf, thinkingBuf bytes.Buffer
|
||||||
_, err = client.SendMessage(ctx, []*store.Message{{
|
_, err = client.SendMessage(ctx, []*store.Message{{
|
||||||
Role: "user",
|
Role: "user",
|
||||||
Content: "Hello, how are you?",
|
Content: "What is 2+2? Think step by step.",
|
||||||
}}, model, func(mc *MessageChunk) error {
|
}}, model, func(mc *MessageChunk) error {
|
||||||
|
if mc.Thinking != nil {
|
||||||
|
_, err := thinkingBuf.Write([]byte(*mc.Thinking))
|
||||||
|
return err
|
||||||
|
}
|
||||||
if mc.Message != nil {
|
if mc.Message != nil {
|
||||||
_, err := buf.Write([]byte(*mc.Message))
|
_, err := contentBuf.Write([]byte(*mc.Message))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
@@ -40,17 +46,26 @@ func TestSendMessage(t *testing.T) {
|
|||||||
t.Fatalf("Failed to generate text stream: %v", err)
|
t.Fatalf("Failed to generate text stream: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Verify Results
|
// Verify Thinking
|
||||||
output := buf.String()
|
thinking := thinkingBuf.String()
|
||||||
|
if thinking == "" {
|
||||||
|
t.Error("No thinking content was received")
|
||||||
|
} else {
|
||||||
|
t.Logf("Thinking (%d bytes): %s", len(thinking), thinking)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify Content
|
||||||
|
output := contentBuf.String()
|
||||||
if output == "" {
|
if output == "" {
|
||||||
t.Error("No content was written to the buffer")
|
t.Error("No content was written to the buffer")
|
||||||
} else {
|
} else {
|
||||||
t.Logf("Successfully received %d bytes from the stream", len(output))
|
t.Logf("Content (%d bytes): %s", len(output), output)
|
||||||
t.Logf("Output: %s", output)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestSummarizeChat(t *testing.T) {
|
func TestSummarizeChat(t *testing.T) {
|
||||||
|
t.Skip("requires live LLM API - run manually with: go test -run TestSummarizeChat ./internal/client/")
|
||||||
|
|
||||||
// Initialize Client
|
// Initialize Client
|
||||||
baseURL, err := url.Parse("https://llm-api.va.reichard.io/v1")
|
baseURL, err := url.Parse("https://llm-api.va.reichard.io/v1")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
Reference in New Issue
Block a user