fix(client): support vLLM "reasoning" field for thinking blocks

vLLM sends thinking content in a "reasoning" delta field, unlike DeepSeek which uses "reasoning_content". Check both field names so thinking blocks render for vLLM-hosted models like qwen3.6-27b-thinking. Also update client tests to exercise thinking output and skip by default so they don't run in Drone CI (require live LLM API).
2026-04-30 21:55:05 -04:00
parent 5d5f10b2d8
commit c51c0ab070
2 changed files with 37 additions and 18 deletions
--- a/backend/internal/client/client.go
+++ b/backend/internal/client/client.go
@@ -115,15 +115,19 @@ func (c *Client) SendMessage(ctx context.Context, chatMessages []*store.Message,
 		if len(chunk.Choices) > 0 {
 			delta := chunk.Choices[0].Delta
-			// Check Thinking
+			// Check Thinking - Support both "reasoning_content" (DeepSeek)
-			if thinkingField, found := delta.JSON.ExtraFields["reasoning_content"]; found {
+			// and "reasoning" (vLLM) field names.
-				var thinkingContent string
+			for _, thinkingKey := range []string{"reasoning_content", "reasoning"} {
-				if err := json.Unmarshal([]byte(thinkingField.Raw()), &thinkingContent); err != nil {
+				if thinkingField, found := delta.JSON.ExtraFields[thinkingKey]; found {
-					return respContent, fmt.Errorf("thinking unmarshal error: %w", err)
+					var thinkingContent string
-				} else if thinkingContent != "" {
+					if err := json.Unmarshal([]byte(thinkingField.Raw()), &thinkingContent); err != nil {
-					msgStats.RecordFirstToken()
+						return respContent, fmt.Errorf("thinking unmarshal error: %w", err)
-					sendUpdate = true
+					} else if thinkingContent != "" {
-					msgChunk.Thinking = ptr.Of(thinkingContent)
+						msgStats.RecordFirstToken()
 						sendUpdate = true
 						msgChunk.Thinking = ptr.Of(thinkingContent)
 					}
 					break
 				}
 			}
--- a/backend/internal/client/client_test.go
+++ b/backend/internal/client/client_test.go
@@ -10,9 +10,11 @@ import (
 	"reichard.io/aethera/internal/store"
 )
-const model = "devstral-small-2-instruct"
+const model = "vllm-qwen3.6-27b-thinking"
 func TestSendMessage(t *testing.T) {
 	t.Skip("requires live LLM API - run manually with: go test -run TestSendMessage ./internal/client/")
 	// Initialize Client
 	baseURL, err := url.Parse("https://llm-api.va.reichard.io/v1")
 	if err != nil {
@@ -21,17 +23,21 @@ func TestSendMessage(t *testing.T) {
 	client := NewClient(baseURL)
 	// Create Context
-	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
 	defer cancel()
 	// Generate Text Stream
-	var buf bytes.Buffer
+	var contentBuf, thinkingBuf bytes.Buffer
 	_, err = client.SendMessage(ctx, []*store.Message{{
 		Role:    "user",
-		Content: "Hello, how are you?",
+		Content: "What is 2+2? Think step by step.",
 	}}, model, func(mc *MessageChunk) error {
 		if mc.Thinking != nil {
 			_, err := thinkingBuf.Write([]byte(*mc.Thinking))
 			return err
 		}
 		if mc.Message != nil {
-			_, err := buf.Write([]byte(*mc.Message))
+			_, err := contentBuf.Write([]byte(*mc.Message))
 			return err
 		}
 		return nil
@@ -40,17 +46,26 @@ func TestSendMessage(t *testing.T) {
 		t.Fatalf("Failed to generate text stream: %v", err)
 	}
-	// Verify Results
+	// Verify Thinking
-	output := buf.String()
+	thinking := thinkingBuf.String()
 	if thinking == "" {
 		t.Error("No thinking content was received")
 	} else {
 		t.Logf("Thinking (%d bytes): %s", len(thinking), thinking)
 	}
 	// Verify Content
 	output := contentBuf.String()
 	if output == "" {
 		t.Error("No content was written to the buffer")
 	} else {
-		t.Logf("Successfully received %d bytes from the stream", len(output))
+		t.Logf("Content (%d bytes): %s", len(output), output)
 		t.Logf("Output: %s", output)
 	}
 }
 func TestSummarizeChat(t *testing.T) {
 	t.Skip("requires live LLM API - run manually with: go test -run TestSummarizeChat ./internal/client/")
 	// Initialize Client
 	baseURL, err := url.Parse("https://llm-api.va.reichard.io/v1")
 	if err != nil {