-
Notifications
You must be signed in to change notification settings - Fork 38
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support the Beta Anthropic prompt-caching / cache_control feature #208
Comments
Claude Sonnet 3.5 suggestions:We'll create extended versions of the Langchain4J classes in the com.devoxx.genie.chatmodel.anthropic package. This approach will allow us to add the new functionality while maintaining compatibility with the existing langchain4j structure. // 1. Create DevoxxAnthropicCreateMessageRequest.java
package com.devoxx.genie.chatmodel.anthropic;
import dev.langchain4j.model.anthropic.internal.api.AnthropicCreateMessageRequest;
import dev.langchain4j.model.anthropic.internal.api.AnthropicMessage;
import dev.langchain4j.model.anthropic.internal.api.AnthropicTool;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.experimental.SuperBuilder;
import java.util.List;
@Data
@EqualsAndHashCode(callSuper = true)
@SuperBuilder(toBuilder = true)
public class DevoxxAnthropicCreateMessageRequest extends AnthropicCreateMessageRequest {
private List<DevoxxAnthropicSystemContent> system;
// Override the getSystem method to return the new system content
@Override
public List<DevoxxAnthropicSystemContent> getSystem() {
return system;
}
// Override the setSystem method to accept the new system content
public void setSystem(List<DevoxxAnthropicSystemContent> system) {
this.system = system;
}
}
// 2. Create DevoxxAnthropicSystemContent.java
package com.devoxx.genie.chatmodel.anthropic;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonInclude;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import static com.fasterxml.jackson.annotation.JsonInclude.Include.NON_NULL;
@Data
@NoArgsConstructor
@AllArgsConstructor
@Builder
@JsonInclude(NON_NULL)
@JsonIgnoreProperties(ignoreUnknown = true)
public class DevoxxAnthropicSystemContent {
private String type;
private String text;
private DevoxxAnthropicCacheControl cacheControl;
}
// 3. Create DevoxxAnthropicCacheControl.java
package com.devoxx.genie.chatmodel.anthropic;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonInclude;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import static com.fasterxml.jackson.annotation.JsonInclude.Include.NON_NULL;
@Data
@NoArgsConstructor
@AllArgsConstructor
@Builder
@JsonInclude(NON_NULL)
@JsonIgnoreProperties(ignoreUnknown = true)
public class DevoxxAnthropicCacheControl {
private String type;
}
// 4. Create DevoxxAnthropicChatModel.java
package com.devoxx.genie.chatmodel.anthropic;
import dev.langchain4j.data.message.AiMessage;
import dev.langchain4j.data.message.ChatMessage;
import dev.langchain4j.model.anthropic.AnthropicChatModel;
import dev.langchain4j.model.output.Response;
import dev.langchain4j.model.anthropic.internal.api.AnthropicCreateMessageRequest;
import dev.langchain4j.model.anthropic.internal.api.AnthropicCreateMessageResponse;
import dev.langchain4j.model.anthropic.internal.client.AnthropicClient;
import java.util.List;
public class DevoxxAnthropicChatModel extends AnthropicChatModel {
private final List<DevoxxAnthropicSystemContent> system;
private final AnthropicClient client;
public static class Builder extends AnthropicChatModel.Builder<DevoxxAnthropicChatModel, Builder> {
private List<DevoxxAnthropicSystemContent> system;
public Builder system(List<DevoxxAnthropicSystemContent> system) {
this.system = system;
return this;
}
@Override
public DevoxxAnthropicChatModel build() {
return new DevoxxAnthropicChatModel(this);
}
}
public static Builder builder() {
return new Builder();
}
protected DevoxxAnthropicChatModel(Builder builder) {
super(builder);
this.system = builder.system;
this.client = AnthropicClient.builder()
.apiKey(builder.apiKey)
.baseUrl(builder.baseUrl)
.version(builder.version)
.beta("prompt-caching-2024-07-31") // Set the beta header for cache control
.timeout(builder.timeout)
.maxRetries(builder.maxRetries)
.logRequests(builder.logRequests)
.logResponses(builder.logResponses)
.build();
}
@Override
public Response<AiMessage> generate(List<ChatMessage> messages) {
DevoxxAnthropicCreateMessageRequest request = createRequest(messages);
AnthropicCreateMessageResponse response = client.createMessage(request);
return processResponse(response);
}
private DevoxxAnthropicCreateMessageRequest createRequest(List<ChatMessage> messages) {
return DevoxxAnthropicCreateMessageRequest.builder()
.model(getModelName())
.messages(toAnthropicMessages(sanitizeMessages(messages)))
.system(system)
.maxTokens(getMaxTokens())
.stopSequences(getStopSequences())
.stream(false)
.temperature(getTemperature())
.topP(getTopP())
.topK(getTopK())
.build();
}
// Implement other necessary methods...
}
// 5. Modify AnthropicChatModelFactory.java
package com.devoxx.genie.chatmodel.anthropic;
// ... existing imports ...
public class AnthropicChatModelFactory implements ChatModelFactory {
@Override
public ChatLanguageModel createChatModel(@NotNull ChatModel chatModel) {
return DevoxxAnthropicChatModel.builder()
.apiKey(getApiKey())
.modelName(chatModel.getModelName())
.temperature(chatModel.getTemperature())
.topP(chatModel.getTopP())
.maxTokens(chatModel.getMaxTokens())
.maxRetries(chatModel.getMaxRetries())
.system(createSystemContent(chatModel))
.build();
}
private List<DevoxxAnthropicSystemContent> createSystemContent(ChatModel chatModel) {
// This is an example. Adjust according to your needs.
return Arrays.asList(
DevoxxAnthropicSystemContent.builder()
.type("text")
.text("You are an AI assistant.")
.build(),
DevoxxAnthropicSystemContent.builder()
.type("text")
.text("This is cacheable content.")
.cacheControl(DevoxxAnthropicCacheControl.builder().type("ephemeral").build())
.build()
);
}
// ... rest of the class ...
} Here's an explanation of the changes:
To use this new feature:
This approach extends the functionality of the langchain4j classes without modifying them directly. The new classes are placed in the You may need to make additional adjustments to ensure full compatibility with your existing code and to handle any new fields in the Anthropic API responses related to cache usage. Why not use an enum for the value "ephemeral" in "cache_control": {"type": "ephemeral"}You're absolutely right. Using an enum for the "ephemeral" value is a better practice. It provides type safety, makes the code more maintainable, and prevents potential errors from typos. Let's modify our implementation to use an enum for the cache control type. // 1. Modify DevoxxAnthropicCacheControl.java
package com.devoxx.genie.chatmodel.anthropic;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonInclude;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import static com.fasterxml.jackson.annotation.JsonInclude.Include.NON_NULL;
@Data
@NoArgsConstructor
@AllArgsConstructor
@Builder
@JsonInclude(NON_NULL)
@JsonIgnoreProperties(ignoreUnknown = true)
public class DevoxxAnthropicCacheControl {
private CacheControlType type;
public enum CacheControlType {
EPHEMERAL
}
}
// 2. Update DevoxxAnthropicChatModel.java (only the relevant part)
package com.devoxx.genie.chatmodel.anthropic;
// ... other imports ...
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.PropertyNamingStrategies;
public class DevoxxAnthropicChatModel extends AnthropicChatModel {
// ... other fields ...
private final ObjectMapper objectMapper;
protected DevoxxAnthropicChatModel(Builder builder) {
super(builder);
this.system = builder.system;
this.client = AnthropicClient.builder()
.apiKey(builder.apiKey)
.baseUrl(builder.baseUrl)
.version(builder.version)
.beta("prompt-caching-2024-07-31") // Set the beta header for cache control
.timeout(builder.timeout)
.maxRetries(builder.maxRetries)
.logRequests(builder.logRequests)
.logResponses(builder.logResponses)
.build();
this.objectMapper = new ObjectMapper()
.setPropertyNamingStrategy(PropertyNamingStrategies.SNAKE_CASE);
}
@Override
public Response<AiMessage> generate(List<ChatMessage> messages) {
DevoxxAnthropicCreateMessageRequest request = createRequest(messages);
String jsonRequest = convertRequestToJson(request);
AnthropicCreateMessageResponse response = client.createMessage(jsonRequest);
return processResponse(response);
}
private String convertRequestToJson(DevoxxAnthropicCreateMessageRequest request) {
try {
return objectMapper.writeValueAsString(request);
} catch (Exception e) {
throw new RuntimeException("Failed to convert request to JSON", e);
}
}
// ... other methods ...
}
// 3. Update AnthropicChatModelFactory.java (only the relevant part)
package com.devoxx.genie.chatmodel.anthropic;
// ... existing imports ...
public class AnthropicChatModelFactory implements ChatModelFactory {
// ... other methods ...
private List<DevoxxAnthropicSystemContent> createSystemContent(ChatModel chatModel) {
// This is an example. Adjust according to your needs.
return Arrays.asList(
DevoxxAnthropicSystemContent.builder()
.type("text")
.text("You are an AI assistant.")
.build(),
DevoxxAnthropicSystemContent.builder()
.type("text")
.text("This is cacheable content.")
.cacheControl(DevoxxAnthropicCacheControl.builder()
.type(DevoxxAnthropicCacheControl.CacheControlType.EPHEMERAL)
.build())
.build()
);
}
// ... rest of the class ...
} Here's an explanation of the changes:
These changes provide several benefits:
Remember to update any other parts of your code that might be creating or using This implementation maintains the extension of langchain4j classes without modifying them directly, while improving type safety and maintainability with the use of an enum for cache control type. |
There's also a Gemini implementation but ofc implemented in a different way: |
When added files to the "window context" the should be added to the cache_control, so when follow up questions are asked the context is cached when using Anthropic. As long as Langchain4J doesn't support it, we'll need to extend the LC4J Anthropic base client.
https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
The text was updated successfully, but these errors were encountered: