Skip to content

Commit 1f910b8

Browse files
Fixes #479 Add filename for multi modal (#480)
* Update llm.go * Update pkg/entities/model_entities/llm.go Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * formatting * add unit test --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
1 parent a386efd commit 1f910b8

File tree

2 files changed

+178
-1
lines changed

2 files changed

+178
-1
lines changed

pkg/entities/model_entities/llm.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ type PromptMessageContent struct {
105105
EncodeFormat string `json:"encode_format"`
106106
Format string `json:"format"`
107107
MimeType string `json:"mime_type"`
108-
Detail string `json:"detail"` // for multi-modal data
108+
Detail string `json:"detail"` // for multi-modal data
109+
Filename string `json:"filename"` // for multi-modal data
109110
}
110111

111112
type PromptMessageToolCall struct {

pkg/entities/model_entities/llm_test.go

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,3 +312,179 @@ func TestLLMResultChunkCompatibility(t *testing.T) {
312312
result = parser.MarshalJson(llmResultChunkPointer)
313313
assert.Contains(t, string(result), `"prompt_messages":[]`)
314314
}
315+
316+
func TestMultiModalPromptMessageWithFilename(t *testing.T) {
317+
const (
318+
imageMessageWithFilename = `
319+
{
320+
"role": "user",
321+
"content": [
322+
{
323+
"type": "image",
324+
"data": "base64encodeddata",
325+
"filename": "example_image.jpg"
326+
}
327+
]
328+
}`
329+
330+
documentMessageWithFilename = `
331+
{
332+
"role": "user",
333+
"content": [
334+
{
335+
"type": "document",
336+
"url": "https://example.com/document.pdf",
337+
"filename": "document.pdf",
338+
"mime_type": "application/pdf"
339+
}
340+
]
341+
}`
342+
343+
audioMessageWithFilename = `
344+
{
345+
"role": "user",
346+
"content": [
347+
{
348+
"type": "audio",
349+
"base64_data": "base64audioencodeddata",
350+
"filename": "audio_sample.mp3",
351+
"format": "mp3"
352+
}
353+
]
354+
}`
355+
356+
videoMessageWithFilename = `
357+
{
358+
"role": "user",
359+
"content": [
360+
{
361+
"type": "video",
362+
"url": "https://example.com/video.mp4",
363+
"filename": "video_sample.mp4"
364+
}
365+
]
366+
}`
367+
368+
mixedContentWithFilename = `
369+
{
370+
"role": "user",
371+
"content": [
372+
{
373+
"type": "text",
374+
"data": "Please analyze this image"
375+
},
376+
{
377+
"type": "image",
378+
"data": "base64encodeddata",
379+
"filename": "screenshot.png"
380+
}
381+
]
382+
}`
383+
)
384+
385+
// Test image message with filename
386+
promptMessage, err := parser.UnmarshalJsonBytes[PromptMessage]([]byte(imageMessageWithFilename))
387+
if err != nil {
388+
t.Error(err)
389+
}
390+
if promptMessage.Role != "user" {
391+
t.Error("role is not user")
392+
}
393+
content := promptMessage.Content.([]PromptMessageContent)
394+
if content[0].Type != "image" {
395+
t.Error("type is not image")
396+
}
397+
if content[0].Filename != "example_image.jpg" {
398+
t.Errorf("expected filename 'example_image.jpg', got '%s'", content[0].Filename)
399+
}
400+
401+
// Test document message with filename
402+
promptMessage, err = parser.UnmarshalJsonBytes[PromptMessage]([]byte(documentMessageWithFilename))
403+
if err != nil {
404+
t.Error(err)
405+
}
406+
content = promptMessage.Content.([]PromptMessageContent)
407+
if content[0].Type != "document" {
408+
t.Error("type is not document")
409+
}
410+
if content[0].Filename != "document.pdf" {
411+
t.Errorf("expected filename 'document.pdf', got '%s'", content[0].Filename)
412+
}
413+
if content[0].MimeType != "application/pdf" {
414+
t.Error("mime_type is not application/pdf")
415+
}
416+
417+
// Test audio message with filename
418+
promptMessage, err = parser.UnmarshalJsonBytes[PromptMessage]([]byte(audioMessageWithFilename))
419+
if err != nil {
420+
t.Error(err)
421+
}
422+
content = promptMessage.Content.([]PromptMessageContent)
423+
if content[0].Type != "audio" {
424+
t.Error("type is not audio")
425+
}
426+
if content[0].Filename != "audio_sample.mp3" {
427+
t.Errorf("expected filename 'audio_sample.mp3', got '%s'", content[0].Filename)
428+
}
429+
430+
// Test video message with filename
431+
promptMessage, err = parser.UnmarshalJsonBytes[PromptMessage]([]byte(videoMessageWithFilename))
432+
if err != nil {
433+
t.Error(err)
434+
}
435+
content = promptMessage.Content.([]PromptMessageContent)
436+
if content[0].Type != "video" {
437+
t.Error("type is not video")
438+
}
439+
if content[0].Filename != "video_sample.mp4" {
440+
t.Errorf("expected filename 'video_sample.mp4', got '%s'", content[0].Filename)
441+
}
442+
443+
// Test mixed content with filename
444+
promptMessage, err = parser.UnmarshalJsonBytes[PromptMessage]([]byte(mixedContentWithFilename))
445+
if err != nil {
446+
t.Error(err)
447+
}
448+
content = promptMessage.Content.([]PromptMessageContent)
449+
if len(content) != 2 {
450+
t.Errorf("expected 2 content items, got %d", len(content))
451+
}
452+
if content[0].Type != "text" {
453+
t.Error("first content type is not text")
454+
}
455+
if content[1].Type != "image" {
456+
t.Error("second content type is not image")
457+
}
458+
if content[1].Filename != "screenshot.png" {
459+
t.Errorf("expected filename 'screenshot.png', got '%s'", content[1].Filename)
460+
}
461+
}
462+
463+
func TestPromptMessageContentWithoutFilename(t *testing.T) {
464+
const (
465+
imageWithoutFilename = `
466+
{
467+
"role": "user",
468+
"content": [
469+
{
470+
"type": "image",
471+
"data": "base64encodeddata"
472+
}
473+
]
474+
}`
475+
)
476+
477+
// Test that messages without filename still work (backward compatibility)
478+
promptMessage, err := parser.UnmarshalJsonBytes[PromptMessage]([]byte(imageWithoutFilename))
479+
if err != nil {
480+
t.Error(err)
481+
}
482+
content := promptMessage.Content.([]PromptMessageContent)
483+
if content[0].Type != "image" {
484+
t.Error("type is not image")
485+
}
486+
// Filename should be empty string when not provided
487+
if content[0].Filename != "" {
488+
t.Errorf("expected empty filename, got '%s'", content[0].Filename)
489+
}
490+
}

0 commit comments

Comments
 (0)