diff --git a/core/http/endpoints/openai/realtime.go b/core/http/endpoints/openai/realtime.go index b36c16031..84e199dd9 100644 --- a/core/http/endpoints/openai/realtime.go +++ b/core/http/endpoints/openai/realtime.go @@ -23,6 +23,7 @@ import ( "github.com/mudler/LocalAI/core/templates" laudio "github.com/mudler/LocalAI/pkg/audio" "github.com/mudler/LocalAI/pkg/functions" + "github.com/mudler/LocalAI/pkg/utils" "github.com/mudler/LocalAI/pkg/grpc/proto" model "github.com/mudler/LocalAI/pkg/model" "github.com/mudler/LocalAI/pkg/reasoning" @@ -949,7 +950,12 @@ func triggerResponse(session *Session, conv *Conversation, c *LockedWebsocket, o case types.MessageContentTypeInputAudio: textContent += content.Transcript case types.MessageContentTypeInputImage: - msg.StringImages = append(msg.StringImages, content.ImageURL) + img, err := utils.GetContentURIAsBase64(content.ImageURL) + if err != nil { + xlog.Warn("Failed to process image", "error", err) + continue + } + msg.StringImages = append(msg.StringImages, img) imgIndex++ nrOfImgsInMessage++ } diff --git a/core/http/endpoints/openai/types/types.go b/core/http/endpoints/openai/types/types.go index ee2e35e66..751e79b6f 100644 --- a/core/http/endpoints/openai/types/types.go +++ b/core/http/endpoints/openai/types/types.go @@ -175,8 +175,8 @@ type ToolFunction struct { // The description of the function, including guidance on when and how to call it, and guidance about what to tell the user when calling (if anything). Description string `json:"description"` - // The type of the tool, i.e. function. - Parameters any `json:"parameters"` + // The jsonschema representing the parameters + Parameters any `json:"parameters,omitempty"` } func (t ToolFunction) ToolType() ToolType {