@@ -312,3 +312,179 @@ func TestLLMResultChunkCompatibility(t *testing.T) {
312312 result = parser .MarshalJson (llmResultChunkPointer )
313313 assert .Contains (t , string (result ), `"prompt_messages":[]` )
314314}
315+
316+ func TestMultiModalPromptMessageWithFilename (t * testing.T ) {
317+ const (
318+ imageMessageWithFilename = `
319+ {
320+ "role": "user",
321+ "content": [
322+ {
323+ "type": "image",
324+ "data": "base64encodeddata",
325+ "filename": "example_image.jpg"
326+ }
327+ ]
328+ }`
329+
330+ documentMessageWithFilename = `
331+ {
332+ "role": "user",
333+ "content": [
334+ {
335+ "type": "document",
336+ "url": "https://example.com/document.pdf",
337+ "filename": "document.pdf",
338+ "mime_type": "application/pdf"
339+ }
340+ ]
341+ }`
342+
343+ audioMessageWithFilename = `
344+ {
345+ "role": "user",
346+ "content": [
347+ {
348+ "type": "audio",
349+ "base64_data": "base64audioencodeddata",
350+ "filename": "audio_sample.mp3",
351+ "format": "mp3"
352+ }
353+ ]
354+ }`
355+
356+ videoMessageWithFilename = `
357+ {
358+ "role": "user",
359+ "content": [
360+ {
361+ "type": "video",
362+ "url": "https://example.com/video.mp4",
363+ "filename": "video_sample.mp4"
364+ }
365+ ]
366+ }`
367+
368+ mixedContentWithFilename = `
369+ {
370+ "role": "user",
371+ "content": [
372+ {
373+ "type": "text",
374+ "data": "Please analyze this image"
375+ },
376+ {
377+ "type": "image",
378+ "data": "base64encodeddata",
379+ "filename": "screenshot.png"
380+ }
381+ ]
382+ }`
383+ )
384+
385+ // Test image message with filename
386+ promptMessage , err := parser.UnmarshalJsonBytes [PromptMessage ]([]byte (imageMessageWithFilename ))
387+ if err != nil {
388+ t .Error (err )
389+ }
390+ if promptMessage .Role != "user" {
391+ t .Error ("role is not user" )
392+ }
393+ content := promptMessage .Content .([]PromptMessageContent )
394+ if content [0 ].Type != "image" {
395+ t .Error ("type is not image" )
396+ }
397+ if content [0 ].Filename != "example_image.jpg" {
398+ t .Errorf ("expected filename 'example_image.jpg', got '%s'" , content [0 ].Filename )
399+ }
400+
401+ // Test document message with filename
402+ promptMessage , err = parser.UnmarshalJsonBytes [PromptMessage ]([]byte (documentMessageWithFilename ))
403+ if err != nil {
404+ t .Error (err )
405+ }
406+ content = promptMessage .Content .([]PromptMessageContent )
407+ if content [0 ].Type != "document" {
408+ t .Error ("type is not document" )
409+ }
410+ if content [0 ].Filename != "document.pdf" {
411+ t .Errorf ("expected filename 'document.pdf', got '%s'" , content [0 ].Filename )
412+ }
413+ if content [0 ].MimeType != "application/pdf" {
414+ t .Error ("mime_type is not application/pdf" )
415+ }
416+
417+ // Test audio message with filename
418+ promptMessage , err = parser.UnmarshalJsonBytes [PromptMessage ]([]byte (audioMessageWithFilename ))
419+ if err != nil {
420+ t .Error (err )
421+ }
422+ content = promptMessage .Content .([]PromptMessageContent )
423+ if content [0 ].Type != "audio" {
424+ t .Error ("type is not audio" )
425+ }
426+ if content [0 ].Filename != "audio_sample.mp3" {
427+ t .Errorf ("expected filename 'audio_sample.mp3', got '%s'" , content [0 ].Filename )
428+ }
429+
430+ // Test video message with filename
431+ promptMessage , err = parser.UnmarshalJsonBytes [PromptMessage ]([]byte (videoMessageWithFilename ))
432+ if err != nil {
433+ t .Error (err )
434+ }
435+ content = promptMessage .Content .([]PromptMessageContent )
436+ if content [0 ].Type != "video" {
437+ t .Error ("type is not video" )
438+ }
439+ if content [0 ].Filename != "video_sample.mp4" {
440+ t .Errorf ("expected filename 'video_sample.mp4', got '%s'" , content [0 ].Filename )
441+ }
442+
443+ // Test mixed content with filename
444+ promptMessage , err = parser.UnmarshalJsonBytes [PromptMessage ]([]byte (mixedContentWithFilename ))
445+ if err != nil {
446+ t .Error (err )
447+ }
448+ content = promptMessage .Content .([]PromptMessageContent )
449+ if len (content ) != 2 {
450+ t .Errorf ("expected 2 content items, got %d" , len (content ))
451+ }
452+ if content [0 ].Type != "text" {
453+ t .Error ("first content type is not text" )
454+ }
455+ if content [1 ].Type != "image" {
456+ t .Error ("second content type is not image" )
457+ }
458+ if content [1 ].Filename != "screenshot.png" {
459+ t .Errorf ("expected filename 'screenshot.png', got '%s'" , content [1 ].Filename )
460+ }
461+ }
462+
463+ func TestPromptMessageContentWithoutFilename (t * testing.T ) {
464+ const (
465+ imageWithoutFilename = `
466+ {
467+ "role": "user",
468+ "content": [
469+ {
470+ "type": "image",
471+ "data": "base64encodeddata"
472+ }
473+ ]
474+ }`
475+ )
476+
477+ // Test that messages without filename still work (backward compatibility)
478+ promptMessage , err := parser.UnmarshalJsonBytes [PromptMessage ]([]byte (imageWithoutFilename ))
479+ if err != nil {
480+ t .Error (err )
481+ }
482+ content := promptMessage .Content .([]PromptMessageContent )
483+ if content [0 ].Type != "image" {
484+ t .Error ("type is not image" )
485+ }
486+ // Filename should be empty string when not provided
487+ if content [0 ].Filename != "" {
488+ t .Errorf ("expected empty filename, got '%s'" , content [0 ].Filename )
489+ }
490+ }
0 commit comments