diff --git a/lib/langchain/assistant.rb b/lib/langchain/assistant.rb index 25411fff..5395fae1 100644 --- a/lib/langchain/assistant.rb +++ b/lib/langchain/assistant.rb @@ -173,11 +173,12 @@ def add_message_and_run!(content: nil, image_url: nil) # Submit tool output # # @param tool_call_id [String] The ID of the tool call to submit output for - # @param output [String] The output of the tool + # @param content [String] The content of the tool call + # @param image_url [String] The image URL of the tool call # @return [Array] The messages - def submit_tool_output(tool_call_id:, output:) + def submit_tool_output(tool_call_id:, content:, image_url: nil) # TODO: Validate that `tool_call_id` is valid by scanning messages and checking if this tool call ID was invoked - add_message(role: @llm_adapter.tool_role, content: output, tool_call_id: tool_call_id) + add_message(role: @llm_adapter.tool_role, content: content, tool_call_id: tool_call_id, image_url: image_url) end # Delete all messages @@ -371,9 +372,9 @@ def run_tool(tool_call) # Call the callback if set tool_execution_callback.call(tool_call_id, tool_name, method_name, tool_arguments) if tool_execution_callback # rubocop:disable Style/SafeNavigation - output = tool_instance.send(method_name, **tool_arguments) + content, image_url = tool_instance.send(method_name, **tool_arguments) - submit_tool_output(tool_call_id: tool_call_id, output: output) + submit_tool_output(tool_call_id: tool_call_id, content: content, image_url: image_url) end # Build a message diff --git a/lib/langchain/assistant/messages/anthropic_message.rb b/lib/langchain/assistant/messages/anthropic_message.rb index 70f38209..1285cfeb 100644 --- a/lib/langchain/assistant/messages/anthropic_message.rb +++ b/lib/langchain/assistant/messages/anthropic_message.rb @@ -76,7 +76,7 @@ def tool_hash { type: "tool_result", tool_use_id: tool_call_id, - content: content + content: build_content_array } ] } diff --git a/lib/langchain/assistant/messages/mistral_ai_message.rb b/lib/langchain/assistant/messages/mistral_ai_message.rb index 2c081d94..ba3de404 100644 --- a/lib/langchain/assistant/messages/mistral_ai_message.rb +++ b/lib/langchain/assistant/messages/mistral_ai_message.rb @@ -102,7 +102,7 @@ def system_hash def tool_hash { role: "tool", - content: content, + content: build_content_array, tool_call_id: tool_call_id } end diff --git a/lib/langchain/assistant/messages/openai_message.rb b/lib/langchain/assistant/messages/openai_message.rb index 44e57a34..e19b1246 100644 --- a/lib/langchain/assistant/messages/openai_message.rb +++ b/lib/langchain/assistant/messages/openai_message.rb @@ -117,7 +117,7 @@ def tool_hash { role: "tool", tool_call_id: tool_call_id, - content: build_content_array + content: build_content_array # Using image_url with tools is not supported by OpenAI (Image URLs are only allowed for messages with role 'user', but this message with role 'tool' contains an image URL.) } end diff --git a/spec/langchain/assistant/assistant_spec.rb b/spec/langchain/assistant/assistant_spec.rb index 432d4df9..e5e96640 100644 --- a/spec/langchain/assistant/assistant_spec.rb +++ b/spec/langchain/assistant/assistant_spec.rb @@ -197,10 +197,17 @@ describe "#submit_tool_output" do it "adds a message to the thread" do - subject.submit_tool_output(tool_call_id: "123", output: "bar") + subject.submit_tool_output(tool_call_id: "123", content: "bar") expect(subject.messages.last.role).to eq("tool") expect(subject.messages.last.content).to eq("bar") end + + it "adds an image to the message" do + subject.submit_tool_output(tool_call_id: "123", image_url: "https://example.com/image.jpg", content: "Hello") + expect(subject.messages.last.role).to eq("tool") + expect(subject.messages.last.content).to eq("Hello") + expect(subject.messages.last.image_url).to eq("https://example.com/image.jpg") + end end describe "#run" do @@ -564,10 +571,17 @@ describe "#submit_tool_output" do it "adds a message to the thread" do - subject.submit_tool_output(tool_call_id: "123", output: "bar") + subject.submit_tool_output(tool_call_id: "123", content: "bar") expect(subject.messages.last.role).to eq("tool") expect(subject.messages.last.content).to eq("bar") end + + it "adds an image to the message" do + subject.submit_tool_output(tool_call_id: "123", image_url: "https://example.com/image.jpg", content: "Hello") + expect(subject.messages.last.role).to eq("tool") + expect(subject.messages.last.content).to eq("Hello") + expect(subject.messages.last.image_url).to eq("https://example.com/image.jpg") + end end describe "#run" do @@ -658,7 +672,7 @@ "type" => "function" } ]}, - {content: "4.0", role: "tool", tool_call_id: "call_9TewGANaaIjzY31UCpAAGLeV"} + {content: [{type: "text", text: "4.0"}], role: "tool", tool_call_id: "call_9TewGANaaIjzY31UCpAAGLeV"} ], tools: calculator.class.function_schemas.to_openai_format, tool_choice: "auto" @@ -916,12 +930,19 @@ end end - describe "submit_tool_output" do + describe "#submit_tool_output" do it "adds a message to the thread" do - subject.submit_tool_output(tool_call_id: "123", output: "bar") + subject.submit_tool_output(tool_call_id: "123", content: "bar") expect(subject.messages.last.role).to eq("function") expect(subject.messages.last.content).to eq("bar") end + + it "does not add image to the message" do + subject.submit_tool_output(tool_call_id: "123", content: "Hello", image_url: "https://example.com/image.jpg") + expect(subject.messages.last.role).to eq("function") + expect(subject.messages.last.content).to eq("Hello") + expect(subject.messages.last.image_url).to be_nil + end end describe "#run" do @@ -1100,7 +1121,7 @@ describe "submit_tool_output" do it "adds a message to the thread" do - subject.submit_tool_output(tool_call_id: "123", output: "bar") + subject.submit_tool_output(tool_call_id: "123", content: "bar") expect(subject.messages.last.role).to eq("tool_result") expect(subject.messages.last.content).to eq("bar") end @@ -1218,7 +1239,7 @@ "input" => {"input" => "2+2"} } ]}, - {role: "user", content: [{type: "tool_result", tool_use_id: "toolu_014eSx9oBA5DMe8gZqaqcJ3H", content: "4.0"}]} + {role: "user", content: [{type: "tool_result", tool_use_id: "toolu_014eSx9oBA5DMe8gZqaqcJ3H", content: [{type: "text", text: "4.0"}]}]} ], tools: calculator.class.function_schemas.to_anthropic_format, tool_choice: {disable_parallel_tool_use: false, type: "auto"}, diff --git a/spec/langchain/assistant/messages/anthropic_message_spec.rb b/spec/langchain/assistant/messages/anthropic_message_spec.rb index a1f04924..ef8f132a 100644 --- a/spec/langchain/assistant/messages/anthropic_message_spec.rb +++ b/spec/langchain/assistant/messages/anthropic_message_spec.rb @@ -108,12 +108,42 @@ { type: "tool_result", tool_use_id: "toolu_014eSx9oBA5DMe8gZqaqcJ3H", - content: "4.0" + content: [ + { + type: "text", + text: "4.0" + } + ] } ] } ) end + + it "returns tool_hash with image_url" do + message = described_class.new(role: "tool_result", image_url: "https://example.com/image.jpg") + allow(message).to receive(:image).and_return(double(base64: "base64_data", mime_type: "image/jpeg")) + + expect(message.to_hash).to eq( + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: nil, + content: [ + { + type: "image", + source: { + type: "base64", + data: "base64_data", + media_type: "image/jpeg" + } + } + ] + } + ] + ) + end end context "when role is user" do diff --git a/spec/langchain/assistant/messages/mistral_ai_message_spec.rb b/spec/langchain/assistant/messages/mistral_ai_message_spec.rb index 9f4af833..d979a90f 100644 --- a/spec/langchain/assistant/messages/mistral_ai_message_spec.rb +++ b/spec/langchain/assistant/messages/mistral_ai_message_spec.rb @@ -18,7 +18,7 @@ let(:message) { described_class.new(role: "tool", content: "Hello, world!", tool_calls: [], tool_call_id: "123") } it "returns a hash with the tool_call_id key" do - expect(message.to_hash).to eq({role: "tool", content: "Hello, world!", tool_call_id: "123"}) + expect(message.to_hash).to eq({role: "tool", content: [{text: "Hello, world!", type: "text"}], tool_call_id: "123"}) end end @@ -36,7 +36,7 @@ end end - context "when image_url is present" do + context "when image_url is present in user message" do let(:message) { described_class.new(role: "user", content: "Please describe this image", image_url: "https://example.com/image.jpg") } it "returns a hash with the image_url key" do @@ -49,5 +49,26 @@ }) end end + + context "when image_url is present in tool message" do + let(:tool_call) { + {"id" => "call_9TewGANaaIjzY31UCpAAGLeV", + "type" => "function", + "function" => {"name" => "dummy_tool__take_photo"}} + } + + let(:message) { described_class.new(role: "tool", content: "Hello, world!", image_url: "https://example.com/image.jpg", tool_calls: [tool_call], tool_call_id: "123") } + + it "returns a hash with the image_url key" do + expect(message.to_hash).to eq({ + role: "tool", + content: [ + {text: "Hello, world!", type: "text"}, + {image_url: "https://example.com/image.jpg", type: "image_url"} + ], + tool_call_id: "123" + }) + end + end end end diff --git a/spec/langchain/assistant/messages/openai_message_spec.rb b/spec/langchain/assistant/messages/openai_message_spec.rb index 6e74bf68..e089053c 100644 --- a/spec/langchain/assistant/messages/openai_message_spec.rb +++ b/spec/langchain/assistant/messages/openai_message_spec.rb @@ -43,6 +43,21 @@ it "returns a tool_hash" do expect(message.to_hash).to eq({role: "tool", content: [{type: "text", text: "Hello, world!"}], tool_call_id: "123"}) end + + context "when image_url is present" do + let(:message) { described_class.new(role: "tool", content: "Hello, world!", image_url: "https://example.com/image.jpg", tool_calls: [], tool_call_id: "123") } + + it "returns a tool_hash with the image_url key" do + expect(message.to_hash).to eq({ + role: "tool", + content: [ + {type: "text", text: "Hello, world!"}, + {type: "image_url", image_url: {url: "https://example.com/image.jpg"}} + ], + tool_call_id: "123" + }) + end + end end context "when role is assistant" do