From ed872c44b78681b9883ba06562be45428861cf13 Mon Sep 17 00:00:00 2001 From: Mohammed Imaduddin Date: Tue, 21 Jan 2025 11:59:38 +0530 Subject: [PATCH 1/2] feat: add support for base64 encoded audio files --- src/rai/rai/messages/multimodal.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/rai/rai/messages/multimodal.py b/src/rai/rai/messages/multimodal.py index 8db862be..8cd2ddfe 100644 --- a/src/rai/rai/messages/multimodal.py +++ b/src/rai/rai/messages/multimodal.py @@ -27,7 +27,7 @@ class MultimodalArtifact(TypedDict): class MultimodalMessage(BaseMessage): images: Optional[List[str]] = None - audios: Optional[Any] = None + audios: Optional[List[str]] = None def __init__( self, @@ -35,8 +35,9 @@ def __init__( ): super().__init__(**kwargs) # type: ignore - if self.audios not in [None, []]: - raise ValueError("Audio is not yet supported") + # remove the audio blocking check + # if self.audios not in [None, []]: + # raise ValueError("Audio is not yet supported") _content: List[Union[str, Dict[str, Union[Dict[str, str], str]]]] = [] @@ -56,6 +57,19 @@ def __init__( for image in self.images ] _content.extend(_image_content) + + # aduio content handling (used audio/wav as MIME type) + if isinstance(self.audios, list): + _audio_content = [ + { + "type": "audio_url", + "audio_url": { + "url": f"data:audio/wav;base64,{audio}", + }, + } + for audio in self.audios + ] + _content.extend(_audio_content) self.content = _content @property From 73ddef7396baeebecd0907f0bfdbb9d70b79e65f Mon Sep 17 00:00:00 2001 From: Mohammed Imaduddin <123477562+mdimado@users.noreply.github.com> Date: Wed, 22 Jan 2025 11:52:50 +0530 Subject: [PATCH 2/2] Update src/rai/rai/messages/multimodal.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Kajetan RachwaƂ <69043608+rachwalk@users.noreply.github.com> --- src/rai/rai/messages/multimodal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rai/rai/messages/multimodal.py b/src/rai/rai/messages/multimodal.py index 8cd2ddfe..bccffa1b 100644 --- a/src/rai/rai/messages/multimodal.py +++ b/src/rai/rai/messages/multimodal.py @@ -58,7 +58,7 @@ def __init__( ] _content.extend(_image_content) - # aduio content handling (used audio/wav as MIME type) + # audio content handling (used audio/wav as MIME type) if isinstance(self.audios, list): _audio_content = [ {