Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat Support image URLs in tool outputs for Langchain::Assistant #894

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions lib/langchain/assistant.rb
Original file line number Diff line number Diff line change
Expand Up @@ -173,11 +173,12 @@ def add_message_and_run!(content: nil, image_url: nil)
# Submit tool output
#
# @param tool_call_id [String] The ID of the tool call to submit output for
# @param output [String] The output of the tool
# @param content [String] The content of the tool call
# @param image_url [String] The image URL of the tool call
# @return [Array<Langchain::Message>] The messages
def submit_tool_output(tool_call_id:, output:)
def submit_tool_output(tool_call_id:, content:, image_url: nil)
# TODO: Validate that `tool_call_id` is valid by scanning messages and checking if this tool call ID was invoked
add_message(role: @llm_adapter.tool_role, content: output, tool_call_id: tool_call_id)
add_message(role: @llm_adapter.tool_role, content: content, tool_call_id: tool_call_id, image_url: image_url)
end

# Delete all messages
Expand Down Expand Up @@ -371,9 +372,9 @@ def run_tool(tool_call)

# Call the callback if set
tool_execution_callback.call(tool_call_id, tool_name, method_name, tool_arguments) if tool_execution_callback # rubocop:disable Style/SafeNavigation
output = tool_instance.send(method_name, **tool_arguments)
content, image_url = tool_instance.send(method_name, **tool_arguments)

submit_tool_output(tool_call_id: tool_call_id, output: output)
submit_tool_output(tool_call_id: tool_call_id, content: content, image_url: image_url)
end

# Build a message
Expand Down
2 changes: 1 addition & 1 deletion lib/langchain/assistant/messages/anthropic_message.rb
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def tool_hash
{
type: "tool_result",
tool_use_id: tool_call_id,
content: content
content: build_content_array
}
]
}
Expand Down
2 changes: 1 addition & 1 deletion lib/langchain/assistant/messages/mistral_ai_message.rb
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def system_hash
def tool_hash
{
role: "tool",
content: content,
content: build_content_array,
tool_call_id: tool_call_id
}
end
Expand Down
2 changes: 1 addition & 1 deletion lib/langchain/assistant/messages/openai_message.rb
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def tool_hash
{
role: "tool",
tool_call_id: tool_call_id,
content: build_content_array
content: build_content_array # Using image_url with tools is not supported by OpenAI (Image URLs are only allowed for messages with role 'user', but this message with role 'tool' contains an image URL.)
}
end

Expand Down
35 changes: 28 additions & 7 deletions spec/langchain/assistant/assistant_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -197,10 +197,17 @@

describe "#submit_tool_output" do
it "adds a message to the thread" do
subject.submit_tool_output(tool_call_id: "123", output: "bar")
subject.submit_tool_output(tool_call_id: "123", content: "bar")
expect(subject.messages.last.role).to eq("tool")
expect(subject.messages.last.content).to eq("bar")
end

it "adds an image to the message" do
subject.submit_tool_output(tool_call_id: "123", image_url: "https://example.com/image.jpg", content: "Hello")
expect(subject.messages.last.role).to eq("tool")
expect(subject.messages.last.content).to eq("Hello")
expect(subject.messages.last.image_url).to eq("https://example.com/image.jpg")
end
end

describe "#run" do
Expand Down Expand Up @@ -564,10 +571,17 @@

describe "#submit_tool_output" do
it "adds a message to the thread" do
subject.submit_tool_output(tool_call_id: "123", output: "bar")
subject.submit_tool_output(tool_call_id: "123", content: "bar")
expect(subject.messages.last.role).to eq("tool")
expect(subject.messages.last.content).to eq("bar")
end

it "adds an image to the message" do
subject.submit_tool_output(tool_call_id: "123", image_url: "https://example.com/image.jpg", content: "Hello")
expect(subject.messages.last.role).to eq("tool")
expect(subject.messages.last.content).to eq("Hello")
expect(subject.messages.last.image_url).to eq("https://example.com/image.jpg")
end
end

describe "#run" do
Expand Down Expand Up @@ -658,7 +672,7 @@
"type" => "function"
}
]},
{content: "4.0", role: "tool", tool_call_id: "call_9TewGANaaIjzY31UCpAAGLeV"}
{content: [{type: "text", text: "4.0"}], role: "tool", tool_call_id: "call_9TewGANaaIjzY31UCpAAGLeV"}
],
tools: calculator.class.function_schemas.to_openai_format,
tool_choice: "auto"
Expand Down Expand Up @@ -916,12 +930,19 @@
end
end

describe "submit_tool_output" do
describe "#submit_tool_output" do
it "adds a message to the thread" do
subject.submit_tool_output(tool_call_id: "123", output: "bar")
subject.submit_tool_output(tool_call_id: "123", content: "bar")
expect(subject.messages.last.role).to eq("function")
expect(subject.messages.last.content).to eq("bar")
end

it "does not add image to the message" do
subject.submit_tool_output(tool_call_id: "123", content: "Hello", image_url: "https://example.com/image.jpg")
expect(subject.messages.last.role).to eq("function")
expect(subject.messages.last.content).to eq("Hello")
expect(subject.messages.last.image_url).to be_nil
end
end

describe "#run" do
Expand Down Expand Up @@ -1100,7 +1121,7 @@

describe "submit_tool_output" do
it "adds a message to the thread" do
subject.submit_tool_output(tool_call_id: "123", output: "bar")
subject.submit_tool_output(tool_call_id: "123", content: "bar")
expect(subject.messages.last.role).to eq("tool_result")
expect(subject.messages.last.content).to eq("bar")
end
Expand Down Expand Up @@ -1218,7 +1239,7 @@
"input" => {"input" => "2+2"}
}
]},
{role: "user", content: [{type: "tool_result", tool_use_id: "toolu_014eSx9oBA5DMe8gZqaqcJ3H", content: "4.0"}]}
{role: "user", content: [{type: "tool_result", tool_use_id: "toolu_014eSx9oBA5DMe8gZqaqcJ3H", content: [{type: "text", text: "4.0"}]}]}
],
tools: calculator.class.function_schemas.to_anthropic_format,
tool_choice: {disable_parallel_tool_use: false, type: "auto"},
Expand Down
32 changes: 31 additions & 1 deletion spec/langchain/assistant/messages/anthropic_message_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,42 @@
{
type: "tool_result",
tool_use_id: "toolu_014eSx9oBA5DMe8gZqaqcJ3H",
content: "4.0"
content: [
{
type: "text",
text: "4.0"
}
]
}
]
}
)
end

it "returns tool_hash with image_url" do
message = described_class.new(role: "tool_result", image_url: "https://example.com/image.jpg")
allow(message).to receive(:image).and_return(double(base64: "base64_data", mime_type: "image/jpeg"))

expect(message.to_hash).to eq(
role: "user",
content: [
{
type: "tool_result",
tool_use_id: nil,
content: [
{
type: "image",
source: {
type: "base64",
data: "base64_data",
media_type: "image/jpeg"
}
}
]
}
]
)
end
end

context "when role is user" do
Expand Down
25 changes: 23 additions & 2 deletions spec/langchain/assistant/messages/mistral_ai_message_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
let(:message) { described_class.new(role: "tool", content: "Hello, world!", tool_calls: [], tool_call_id: "123") }

it "returns a hash with the tool_call_id key" do
expect(message.to_hash).to eq({role: "tool", content: "Hello, world!", tool_call_id: "123"})
expect(message.to_hash).to eq({role: "tool", content: [{text: "Hello, world!", type: "text"}], tool_call_id: "123"})
end
end

Expand All @@ -36,7 +36,7 @@
end
end

context "when image_url is present" do
context "when image_url is present in user message" do
let(:message) { described_class.new(role: "user", content: "Please describe this image", image_url: "https://example.com/image.jpg") }

it "returns a hash with the image_url key" do
Expand All @@ -49,5 +49,26 @@
})
end
end

context "when image_url is present in tool message" do
let(:tool_call) {
{"id" => "call_9TewGANaaIjzY31UCpAAGLeV",
"type" => "function",
"function" => {"name" => "dummy_tool__take_photo"}}
}

let(:message) { described_class.new(role: "tool", content: "Hello, world!", image_url: "https://example.com/image.jpg", tool_calls: [tool_call], tool_call_id: "123") }

it "returns a hash with the image_url key" do
expect(message.to_hash).to eq({
role: "tool",
content: [
{text: "Hello, world!", type: "text"},
{image_url: "https://example.com/image.jpg", type: "image_url"}
],
tool_call_id: "123"
})
end
end
end
end
15 changes: 15 additions & 0 deletions spec/langchain/assistant/messages/openai_message_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,21 @@
it "returns a tool_hash" do
expect(message.to_hash).to eq({role: "tool", content: [{type: "text", text: "Hello, world!"}], tool_call_id: "123"})
end

context "when image_url is present" do
let(:message) { described_class.new(role: "tool", content: "Hello, world!", image_url: "https://example.com/image.jpg", tool_calls: [], tool_call_id: "123") }

it "returns a tool_hash with the image_url key" do
expect(message.to_hash).to eq({
role: "tool",
content: [
{type: "text", text: "Hello, world!"},
{type: "image_url", image_url: {url: "https://example.com/image.jpg"}}
],
tool_call_id: "123"
})
end
end
end

context "when role is assistant" do
Expand Down