From c7a2fe37befd9862738c0297ff6d5289d513d1a8 Mon Sep 17 00:00:00 2001
From: Jack Collins <6640905+jackmpcollins@users.noreply.github.com>
Date: Fri, 29 Nov 2024 14:54:54 -0800
Subject: [PATCH 1/7] Rename test to test_openai_chat_model_acomplete_ollama
 (#381)

---
 pyproject.toml                                |   4 +-
 ...ama[Return True.-output_types1-bool].yaml} |   6 +-
 ... [1, 2, 3, 4, 5]-output_types2-list].yaml} |   6 +-
 ..._ollama[Say hello!-output_types0-str].yaml | 144 ++++++++++++++++++
 ...e_async[Say hello!-output_types0-str].yaml | 129 ----------------
 ...lama[Return True.-output_types1-bool].yaml |   6 +-
 ...n [1, 2, 3, 4, 5]-output_types2-list].yaml |   6 +-
 ..._ollama[Say hello!-output_types0-str].yaml |  63 ++++----
 .../test_openai_chat_model_ollama.py          |   2 +-
 9 files changed, 187 insertions(+), 179 deletions(-)
 rename tests/chat_model/cassettes/test_openai_chat_model_ollama/{test_openai_chat_model_complete_async[Return True.-output_types1-bool].yaml => test_openai_chat_model_acomplete_ollama[Return True.-output_types1-bool].yaml} (82%)
 rename tests/chat_model/cassettes/test_openai_chat_model_ollama/{test_openai_chat_model_complete_async[Return [1, 2, 3, 4, 5]-output_types2-list].yaml => test_openai_chat_model_acomplete_ollama[Return [1, 2, 3, 4, 5]-output_types2-list].yaml} (83%)
 create mode 100644 tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_acomplete_ollama[Say hello!-output_types0-str].yaml
 delete mode 100644 tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_complete_async[Say hello!-output_types0-str].yaml

diff --git a/pyproject.toml b/pyproject.toml
index 3d79c8d..f6e3434 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -78,11 +78,11 @@ asyncio_mode = "auto"
 markers = [
     "anthropic: Tests that query the Anthropic API. Requires the ANTHROPIC_API_KEY environment variable to be set.",
     "litellm_anthropic: Tests that query the Anthropic API via litellm. Requires the ANTHROPIC_API_KEY environment variable to be set.",
-    "litellm_ollama: Tests that query Ollama via Litellm. Requires ollama to be installed and running on localhost:11434.",
+    "litellm_ollama: Tests that query Ollama via litellm. Requires ollama to be installed and running on localhost:11434.",
     "litellm_openai: Tests that query the OpenAI API via litellm. Requires the OPENAI_API_KEY environment variable to be set.",
     "mistral: Tests that query the Mistral API (via openai). Requires the MISTRAL_API_KEY environment variable to be set.",
     "openai: Tests that query the OpenAI API. Requires the OPENAI_API_KEY environment variable to be set.",
-    "openai_ollama: Tests that query Ollama via OpenAI. Requires ollama to be installed and running on localhost:11434.",
+    "openai_ollama: Tests that query Ollama via openai. Requires ollama to be installed and running on localhost:11434.",
 ]
 
 [tool.ruff]
diff --git a/tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_complete_async[Return True.-output_types1-bool].yaml b/tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_acomplete_ollama[Return True.-output_types1-bool].yaml
similarity index 82%
rename from tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_complete_async[Return True.-output_types1-bool].yaml
rename to tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_acomplete_ollama[Return True.-output_types1-bool].yaml
index 77a69d2..f99b860 100644
--- a/tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_complete_async[Return True.-output_types1-bool].yaml	
+++ b/tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_acomplete_ollama[Return True.-output_types1-bool].yaml	
@@ -41,10 +41,10 @@ interactions:
     uri: http://localhost:11434/v1/chat/completions
   response:
     body:
-      string: 'data: {"id":"chatcmpl-573","object":"chat.completion.chunk","created":1732915688,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"","tool_calls":[{"id":"call_wqha42ih","type":"function","function":{"name":"return_bool","arguments":"{\"value\":true}"}}]},"finish_reason":null}]}
+      string: 'data: {"id":"chatcmpl-148","object":"chat.completion.chunk","created":1732916961,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"","tool_calls":[{"id":"call_j25hbnia","type":"function","function":{"name":"return_bool","arguments":"{\"value\":true}"}}]},"finish_reason":null}]}
 
 
-        data: {"id":"chatcmpl-573","object":"chat.completion.chunk","created":1732915688,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":"stop"}]}
+        data: {"id":"chatcmpl-148","object":"chat.completion.chunk","created":1732916961,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":"stop"}]}
 
 
         data: [DONE]
@@ -55,7 +55,7 @@ interactions:
       Content-Type:
       - text/event-stream
       Date:
-      - Fri, 29 Nov 2024 21:28:08 GMT
+      - Fri, 29 Nov 2024 21:49:21 GMT
       Transfer-Encoding:
       - chunked
     status:
diff --git a/tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_complete_async[Return [1, 2, 3, 4, 5]-output_types2-list].yaml b/tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_acomplete_ollama[Return [1, 2, 3, 4, 5]-output_types2-list].yaml
similarity index 83%
rename from tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_complete_async[Return [1, 2, 3, 4, 5]-output_types2-list].yaml
rename to tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_acomplete_ollama[Return [1, 2, 3, 4, 5]-output_types2-list].yaml
index e2fc055..111ef5c 100644
--- a/tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_complete_async[Return [1, 2, 3, 4, 5]-output_types2-list].yaml	
+++ b/tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_acomplete_ollama[Return [1, 2, 3, 4, 5]-output_types2-list].yaml	
@@ -42,11 +42,11 @@ interactions:
     uri: http://localhost:11434/v1/chat/completions
   response:
     body:
-      string: 'data: {"id":"chatcmpl-877","object":"chat.completion.chunk","created":1732915691,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"","tool_calls":[{"id":"call_kichi5ua","type":"function","function":{"name":"return_list_of_int","arguments":"{\"value\":\"[1,
+      string: 'data: {"id":"chatcmpl-622","object":"chat.completion.chunk","created":1732916932,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"","tool_calls":[{"id":"call_d45b8f5x","type":"function","function":{"name":"return_list_of_int","arguments":"{\"value\":\"[1,
         2, 3, 4, 5]\"}"}}]},"finish_reason":null}]}
 
 
-        data: {"id":"chatcmpl-877","object":"chat.completion.chunk","created":1732915691,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":"stop"}]}
+        data: {"id":"chatcmpl-622","object":"chat.completion.chunk","created":1732916932,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":"stop"}]}
 
 
         data: [DONE]
@@ -57,7 +57,7 @@ interactions:
       Content-Type:
       - text/event-stream
       Date:
-      - Fri, 29 Nov 2024 21:28:11 GMT
+      - Fri, 29 Nov 2024 21:48:52 GMT
       Transfer-Encoding:
       - chunked
     status:
diff --git a/tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_acomplete_ollama[Say hello!-output_types0-str].yaml b/tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_acomplete_ollama[Say hello!-output_types0-str].yaml
new file mode 100644
index 0000000..023545e
--- /dev/null
+++ b/tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_acomplete_ollama[Say hello!-output_types0-str].yaml	
@@ -0,0 +1,144 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Say hello!"}], "model": "llama3.1",
+      "stream": true, "stream_options": {"include_usage": true}}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '137'
+      content-type:
+      - application/json
+      host:
+      - localhost:11434
+      user-agent:
+      - AsyncOpenAI/Python 1.54.4
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.54.4
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.10.15
+    method: POST
+    uri: http://localhost:11434/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-894","object":"chat.completion.chunk","created":1732916924,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"Hello"},"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-894","object":"chat.completion.chunk","created":1732916924,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"!"},"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-894","object":"chat.completion.chunk","created":1732916924,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        It"},"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-894","object":"chat.completion.chunk","created":1732916924,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"''s"},"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-894","object":"chat.completion.chunk","created":1732916924,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        nice"},"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-894","object":"chat.completion.chunk","created":1732916925,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        to"},"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-894","object":"chat.completion.chunk","created":1732916925,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        meet"},"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-894","object":"chat.completion.chunk","created":1732916925,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        you"},"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-894","object":"chat.completion.chunk","created":1732916925,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"."},"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-894","object":"chat.completion.chunk","created":1732916925,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        Is"},"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-894","object":"chat.completion.chunk","created":1732916925,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        there"},"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-894","object":"chat.completion.chunk","created":1732916925,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        something"},"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-894","object":"chat.completion.chunk","created":1732916925,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        I"},"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-894","object":"chat.completion.chunk","created":1732916925,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        can"},"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-894","object":"chat.completion.chunk","created":1732916925,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        help"},"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-894","object":"chat.completion.chunk","created":1732916925,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        with"},"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-894","object":"chat.completion.chunk","created":1732916925,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        or"},"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-894","object":"chat.completion.chunk","created":1732916926,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        would"},"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-894","object":"chat.completion.chunk","created":1732916926,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        you"},"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-894","object":"chat.completion.chunk","created":1732916926,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        like"},"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-894","object":"chat.completion.chunk","created":1732916926,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        to"},"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-894","object":"chat.completion.chunk","created":1732916926,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        chat"},"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-894","object":"chat.completion.chunk","created":1732916926,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"finish_reason":null}]}
+
+
+        data: {"id":"chatcmpl-894","object":"chat.completion.chunk","created":1732916926,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":"stop"}]}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      Content-Type:
+      - text/event-stream
+      Date:
+      - Fri, 29 Nov 2024 21:48:44 GMT
+      Transfer-Encoding:
+      - chunked
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_complete_async[Say hello!-output_types0-str].yaml b/tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_complete_async[Say hello!-output_types0-str].yaml
deleted file mode 100644
index bbbe710..0000000
--- a/tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_complete_async[Say hello!-output_types0-str].yaml	
+++ /dev/null
@@ -1,129 +0,0 @@
-interactions:
-- request:
-    body: '{"messages": [{"role": "user", "content": "Say hello!"}], "model": "llama3.1",
-      "stream": true, "stream_options": {"include_usage": true}}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '137'
-      content-type:
-      - application/json
-      host:
-      - localhost:11434
-      user-agent:
-      - AsyncOpenAI/Python 1.54.4
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - async:asyncio
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.54.4
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.10.15
-    method: POST
-    uri: http://localhost:11434/v1/chat/completions
-  response:
-    body:
-      string: 'data: {"id":"chatcmpl-402","object":"chat.completion.chunk","created":1732915683,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"Hello"},"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-402","object":"chat.completion.chunk","created":1732915684,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"!"},"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-402","object":"chat.completion.chunk","created":1732915684,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
-        How"},"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-402","object":"chat.completion.chunk","created":1732915684,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
-        are"},"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-402","object":"chat.completion.chunk","created":1732915684,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
-        you"},"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-402","object":"chat.completion.chunk","created":1732915684,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
-        doing"},"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-402","object":"chat.completion.chunk","created":1732915684,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
-        today"},"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-402","object":"chat.completion.chunk","created":1732915684,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-402","object":"chat.completion.chunk","created":1732915684,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
-        Is"},"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-402","object":"chat.completion.chunk","created":1732915684,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
-        there"},"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-402","object":"chat.completion.chunk","created":1732915684,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
-        anything"},"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-402","object":"chat.completion.chunk","created":1732915684,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
-        I"},"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-402","object":"chat.completion.chunk","created":1732915684,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
-        can"},"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-402","object":"chat.completion.chunk","created":1732915685,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
-        help"},"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-402","object":"chat.completion.chunk","created":1732915685,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
-        with"},"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-402","object":"chat.completion.chunk","created":1732915685,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
-        or"},"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-402","object":"chat.completion.chunk","created":1732915685,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
-        chat"},"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-402","object":"chat.completion.chunk","created":1732915685,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
-        about"},"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-402","object":"chat.completion.chunk","created":1732915685,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-402","object":"chat.completion.chunk","created":1732915685,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":"stop"}]}
-
-
-        data: [DONE]
-
-
-        '
-    headers:
-      Content-Type:
-      - text/event-stream
-      Date:
-      - Fri, 29 Nov 2024 21:28:03 GMT
-      Transfer-Encoding:
-      - chunked
-    status:
-      code: 200
-      message: OK
-version: 1
diff --git a/tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_complete_ollama[Return True.-output_types1-bool].yaml b/tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_complete_ollama[Return True.-output_types1-bool].yaml
index 7b3ab70..3686fe6 100644
--- a/tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_complete_ollama[Return True.-output_types1-bool].yaml	
+++ b/tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_complete_ollama[Return True.-output_types1-bool].yaml	
@@ -41,10 +41,10 @@ interactions:
     uri: http://localhost:11434/v1/chat/completions
   response:
     body:
-      string: 'data: {"id":"chatcmpl-341","object":"chat.completion.chunk","created":1732915676,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"","tool_calls":[{"id":"call_dxlghlch","type":"function","function":{"name":"return_bool","arguments":"{\"value\":true}"}}]},"finish_reason":null}]}
+      string: 'data: {"id":"chatcmpl-826","object":"chat.completion.chunk","created":1732916920,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"","tool_calls":[{"id":"call_kve3oltz","type":"function","function":{"name":"return_bool","arguments":"{\"value\":true}"}}]},"finish_reason":null}]}
 
 
-        data: {"id":"chatcmpl-341","object":"chat.completion.chunk","created":1732915676,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":"stop"}]}
+        data: {"id":"chatcmpl-826","object":"chat.completion.chunk","created":1732916920,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":"stop"}]}
 
 
         data: [DONE]
@@ -55,7 +55,7 @@ interactions:
       Content-Type:
       - text/event-stream
       Date:
-      - Fri, 29 Nov 2024 21:27:56 GMT
+      - Fri, 29 Nov 2024 21:48:40 GMT
       Transfer-Encoding:
       - chunked
     status:
diff --git a/tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_complete_ollama[Return [1, 2, 3, 4, 5]-output_types2-list].yaml b/tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_complete_ollama[Return [1, 2, 3, 4, 5]-output_types2-list].yaml
index 4b8fe6c..986c0b3 100644
--- a/tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_complete_ollama[Return [1, 2, 3, 4, 5]-output_types2-list].yaml	
+++ b/tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_complete_ollama[Return [1, 2, 3, 4, 5]-output_types2-list].yaml	
@@ -42,11 +42,11 @@ interactions:
     uri: http://localhost:11434/v1/chat/completions
   response:
     body:
-      string: 'data: {"id":"chatcmpl-168","object":"chat.completion.chunk","created":1732915680,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"","tool_calls":[{"id":"call_ygc23skr","type":"function","function":{"name":"return_list_of_int","arguments":"{\"value\":\"[1,
+      string: 'data: {"id":"chatcmpl-908","object":"chat.completion.chunk","created":1732916923,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"","tool_calls":[{"id":"call_28sm5ma0","type":"function","function":{"name":"return_list_of_int","arguments":"{\"value\":\"[1,
         2, 3, 4, 5]\"}"}}]},"finish_reason":null}]}
 
 
-        data: {"id":"chatcmpl-168","object":"chat.completion.chunk","created":1732915680,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":"stop"}]}
+        data: {"id":"chatcmpl-908","object":"chat.completion.chunk","created":1732916923,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":"stop"}]}
 
 
         data: [DONE]
@@ -57,7 +57,7 @@ interactions:
       Content-Type:
       - text/event-stream
       Date:
-      - Fri, 29 Nov 2024 21:28:00 GMT
+      - Fri, 29 Nov 2024 21:48:43 GMT
       Transfer-Encoding:
       - chunked
     status:
diff --git a/tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_complete_ollama[Say hello!-output_types0-str].yaml b/tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_complete_ollama[Say hello!-output_types0-str].yaml
index 7f3a541..d6cea13 100644
--- a/tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_complete_ollama[Say hello!-output_types0-str].yaml	
+++ b/tests/chat_model/cassettes/test_openai_chat_model_ollama/test_openai_chat_model_complete_ollama[Say hello!-output_types0-str].yaml	
@@ -37,98 +37,91 @@ interactions:
     uri: http://localhost:11434/v1/chat/completions
   response:
     body:
-      string: 'data: {"id":"chatcmpl-725","object":"chat.completion.chunk","created":1732915672,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"Hello"},"finish_reason":null}]}
+      string: 'data: {"id":"chatcmpl-10","object":"chat.completion.chunk","created":1732916915,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"Hello"},"finish_reason":null}]}
 
 
-        data: {"id":"chatcmpl-725","object":"chat.completion.chunk","created":1732915672,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"!"},"finish_reason":null}]}
+        data: {"id":"chatcmpl-10","object":"chat.completion.chunk","created":1732916916,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"!"},"finish_reason":null}]}
 
 
-        data: {"id":"chatcmpl-725","object":"chat.completion.chunk","created":1732915672,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
-        It"},"finish_reason":null}]}
+        data: {"id":"chatcmpl-10","object":"chat.completion.chunk","created":1732916916,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        How"},"finish_reason":null}]}
 
 
-        data: {"id":"chatcmpl-725","object":"chat.completion.chunk","created":1732915672,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"''s"},"finish_reason":null}]}
+        data: {"id":"chatcmpl-10","object":"chat.completion.chunk","created":1732916916,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        are"},"finish_reason":null}]}
 
 
-        data: {"id":"chatcmpl-725","object":"chat.completion.chunk","created":1732915672,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
-        nice"},"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-725","object":"chat.completion.chunk","created":1732915672,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
-        to"},"finish_reason":null}]}
-
-
-        data: {"id":"chatcmpl-725","object":"chat.completion.chunk","created":1732915672,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
-        meet"},"finish_reason":null}]}
+        data: {"id":"chatcmpl-10","object":"chat.completion.chunk","created":1732916916,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        you"},"finish_reason":null}]}
 
 
-        data: {"id":"chatcmpl-725","object":"chat.completion.chunk","created":1732915672,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
-        you"},"finish_reason":null}]}
+        data: {"id":"chatcmpl-10","object":"chat.completion.chunk","created":1732916916,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        today"},"finish_reason":null}]}
 
 
-        data: {"id":"chatcmpl-725","object":"chat.completion.chunk","created":1732915672,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"."},"finish_reason":null}]}
+        data: {"id":"chatcmpl-10","object":"chat.completion.chunk","created":1732916916,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"finish_reason":null}]}
 
 
-        data: {"id":"chatcmpl-725","object":"chat.completion.chunk","created":1732915672,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        data: {"id":"chatcmpl-10","object":"chat.completion.chunk","created":1732916916,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
         Is"},"finish_reason":null}]}
 
 
-        data: {"id":"chatcmpl-725","object":"chat.completion.chunk","created":1732915673,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        data: {"id":"chatcmpl-10","object":"chat.completion.chunk","created":1732916916,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
         there"},"finish_reason":null}]}
 
 
-        data: {"id":"chatcmpl-725","object":"chat.completion.chunk","created":1732915673,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        data: {"id":"chatcmpl-10","object":"chat.completion.chunk","created":1732916916,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
         something"},"finish_reason":null}]}
 
 
-        data: {"id":"chatcmpl-725","object":"chat.completion.chunk","created":1732915673,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        data: {"id":"chatcmpl-10","object":"chat.completion.chunk","created":1732916916,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
         I"},"finish_reason":null}]}
 
 
-        data: {"id":"chatcmpl-725","object":"chat.completion.chunk","created":1732915673,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        data: {"id":"chatcmpl-10","object":"chat.completion.chunk","created":1732916916,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
         can"},"finish_reason":null}]}
 
 
-        data: {"id":"chatcmpl-725","object":"chat.completion.chunk","created":1732915673,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        data: {"id":"chatcmpl-10","object":"chat.completion.chunk","created":1732916916,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
         help"},"finish_reason":null}]}
 
 
-        data: {"id":"chatcmpl-725","object":"chat.completion.chunk","created":1732915673,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        data: {"id":"chatcmpl-10","object":"chat.completion.chunk","created":1732916917,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
         you"},"finish_reason":null}]}
 
 
-        data: {"id":"chatcmpl-725","object":"chat.completion.chunk","created":1732915673,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        data: {"id":"chatcmpl-10","object":"chat.completion.chunk","created":1732916917,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
         with"},"finish_reason":null}]}
 
 
-        data: {"id":"chatcmpl-725","object":"chat.completion.chunk","created":1732915673,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        data: {"id":"chatcmpl-10","object":"chat.completion.chunk","created":1732916917,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
         or"},"finish_reason":null}]}
 
 
-        data: {"id":"chatcmpl-725","object":"chat.completion.chunk","created":1732915673,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        data: {"id":"chatcmpl-10","object":"chat.completion.chunk","created":1732916917,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
         would"},"finish_reason":null}]}
 
 
-        data: {"id":"chatcmpl-725","object":"chat.completion.chunk","created":1732915673,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        data: {"id":"chatcmpl-10","object":"chat.completion.chunk","created":1732916917,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
         you"},"finish_reason":null}]}
 
 
-        data: {"id":"chatcmpl-725","object":"chat.completion.chunk","created":1732915673,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        data: {"id":"chatcmpl-10","object":"chat.completion.chunk","created":1732916917,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
         like"},"finish_reason":null}]}
 
 
-        data: {"id":"chatcmpl-725","object":"chat.completion.chunk","created":1732915673,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        data: {"id":"chatcmpl-10","object":"chat.completion.chunk","created":1732916917,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
         to"},"finish_reason":null}]}
 
 
-        data: {"id":"chatcmpl-725","object":"chat.completion.chunk","created":1732915674,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
+        data: {"id":"chatcmpl-10","object":"chat.completion.chunk","created":1732916917,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"
         chat"},"finish_reason":null}]}
 
 
-        data: {"id":"chatcmpl-725","object":"chat.completion.chunk","created":1732915674,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"finish_reason":null}]}
+        data: {"id":"chatcmpl-10","object":"chat.completion.chunk","created":1732916917,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"finish_reason":null}]}
 
 
-        data: {"id":"chatcmpl-725","object":"chat.completion.chunk","created":1732915674,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":"stop"}]}
+        data: {"id":"chatcmpl-10","object":"chat.completion.chunk","created":1732916917,"model":"llama3.1","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":"stop"}]}
 
 
         data: [DONE]
@@ -139,7 +132,7 @@ interactions:
       Content-Type:
       - text/event-stream
       Date:
-      - Fri, 29 Nov 2024 21:27:52 GMT
+      - Fri, 29 Nov 2024 21:48:35 GMT
       Transfer-Encoding:
       - chunked
     status:
diff --git a/tests/chat_model/test_openai_chat_model_ollama.py b/tests/chat_model/test_openai_chat_model_ollama.py
index b9d0a66..6f3174b 100644
--- a/tests/chat_model/test_openai_chat_model_ollama.py
+++ b/tests/chat_model/test_openai_chat_model_ollama.py
@@ -30,7 +30,7 @@ def test_openai_chat_model_complete_ollama(prompt, output_types, expected_output
     ],
 )
 @pytest.mark.openai_ollama
-async def test_openai_chat_model_complete_async(
+async def test_openai_chat_model_acomplete_ollama(
     prompt, output_types, expected_output_type
 ):
     chat_model = OpenaiChatModel("llama3.1", base_url="http://localhost:11434/v1/")

From 0cc46d940f924a908afa8f8677c52966a3a6a3b4 Mon Sep 17 00:00:00 2001
From: Jack Collins <6640905+jackmpcollins@users.noreply.github.com>
Date: Sat, 30 Nov 2024 01:23:42 -0800
Subject: [PATCH 2/7] Add `(Async)StreamedResponse` for multi-part responses
 (#383)

* Add disable_parallel_tool_use to Anthropic tool_choice arg

* Update cassettes for disable_parallel_tool_use arg

* uv add --dev pydeps

* Add (Async)StreamedResponse type

* Implement StreamedResponse for AnthropicChatModel

* Add make dep-diagram

* Move parsing logic to _parsing. Use in openai, litellm models

* Add TODOs for exceptions and parsing

* Use contains_string_type in MistralChatModel

* Fix pyright issue with Mistral staticmethod

* Add docs for StreamedResponse

* Handle StreamedResponse in message_to_openai_message

* Load .env env vars first for testing

* Add (a)consume. Fix agroupby to consume unconsumed group

* Fix non-consumed streamed output groups. Test openai StreamedResponse

* Add make test-vcr-once

* Add tests for StreamedResponse with AnthropicChatModel

* Fix mypy errors

* Add missing comment re finishing OutputStream group
---
 .gitignore                                    |   7 +-
 Makefile                                      |   9 +
 docs/streaming.md                             |  44 ++++
 docs/structured-outputs.md                    |   4 +
 pyproject.toml                                |   1 +
 src/magentic/__init__.py                      |   2 +
 src/magentic/_parsing.py                      |  27 +++
 src/magentic/_streamed_response.py            |  63 +++++
 .../chat_model/anthropic_chat_model.py        |  43 ++--
 src/magentic/chat_model/base.py               |  13 +-
 src/magentic/chat_model/function_schema.py    |   3 +
 src/magentic/chat_model/litellm_chat_model.py |  20 +-
 src/magentic/chat_model/mistral_chat_model.py |  10 +-
 src/magentic/chat_model/openai_chat_model.py  |  53 +++--
 src/magentic/chat_model/stream.py             |  49 +++-
 src/magentic/streaming.py                     |  21 +-
 ...complete_async_parallel_function_call.yaml |  62 +++--
 ...del_acomplete_async_streamed_response.yaml | 222 +++++++++++++++++
 ...l_complete_no_structured_output_error.yaml |  30 +--
 ...model_complete_parallel_function_call.yaml |  59 +++--
 ...chat_model_complete_streamed_response.yaml | 224 ++++++++++++++++++
 ...del_acomplete_async_streamed_response.yaml | 184 ++++++++++++++
 ...chat_model_complete_streamed_response.yaml | 184 ++++++++++++++
 tests/chat_model/test_anthropic_chat_model.py |  39 +++
 tests/chat_model/test_openai_chat_model.py    |  55 +++++
 tests/conftest.py                             |   3 +-
 tests/test_streaming.py                       |   7 +
 uv.lock                                       |  23 ++
 28 files changed, 1320 insertions(+), 141 deletions(-)
 create mode 100644 src/magentic/_parsing.py
 create mode 100644 src/magentic/_streamed_response.py
 create mode 100644 tests/chat_model/cassettes/test_anthropic_chat_model/test_anthropic_chat_model_acomplete_async_streamed_response.yaml
 create mode 100644 tests/chat_model/cassettes/test_anthropic_chat_model/test_anthropic_chat_model_complete_streamed_response.yaml
 create mode 100644 tests/chat_model/cassettes/test_openai_chat_model/test_openai_chat_model_acomplete_async_streamed_response.yaml
 create mode 100644 tests/chat_model/cassettes/test_openai_chat_model/test_openai_chat_model_complete_streamed_response.yaml

diff --git a/.gitignore b/.gitignore
index 16cab9f..049431f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,9 @@
-# https://github.com/github/gitignore/blob/4488915eec0b3a45b5c63ead28f286819c0917de/Python.gitignore
+## Custom
+
+# Dependency diagram
+magentic.svg
+
+## Python https://github.com/github/gitignore/blob/4488915eec0b3a45b5c63ead28f286819c0917de/Python.gitignore
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
diff --git a/Makefile b/Makefile
index 11370df..acedee2 100644
--- a/Makefile
+++ b/Makefile
@@ -43,6 +43,10 @@ testcov: test  # Run tests and generate a coverage report
 	@echo "building coverage html"
 	uv run coverage html --show-contexts
 
+.PHONY: test-vcr-once
+test-vcr-once:  # Run the tests and record new VCR cassettes
+	uv run pytest -vv --record-mode=once
+
 .PHONY: test-fix-vcr
 test-fix-vcr:  # Run the last failed tests and rewrite the VCR cassettes
 	uv run pytest -vv --last-failed --last-failed-no-failures=none --record-mode=rewrite
@@ -55,5 +59,10 @@ docs:  # Build the documentation
 docs-serve:  # Build and serve the documentation
 	uv run mkdocs serve
 
+.PHONY: dep-diagram
+dep-diagram:  # Generate a dependency diagram
+	uv run pydeps src/magentic --no-show --only "magentic." --rmprefix "magentic." -x "magentic.logger" --exclude-exact "magentic.chat_model"
+	open -a Arc magentic.svg
+
 .PHONY: all
 all: format lint typecheck test
diff --git a/docs/streaming.md b/docs/streaming.md
index ed29911..348f350 100644
--- a/docs/streaming.md
+++ b/docs/streaming.md
@@ -79,3 +79,47 @@ for hero in create_superhero_team("The Food Dudes"):
 # 4.03s : name='Captain Carrot' age=35 power='Super strength and agility from eating carrots' enemies=['The Sugar Squad', 'The Greasy Gang']
 # 6.05s : name='Ice Cream Girl' age=25 power='Can create ice cream out of thin air' enemies=['The Hot Sauce Squad', 'The Healthy Eaters']
 ```
+
+## StreamedResponse
+
+Some LLMs have the ability to generate text output and make tool calls in the same response. This allows them to perform chain-of-thought reasoning or provide additional context to the user. In magentic, the `StreamedResponse` (or `AsyncStreamedResponse`) class can be used to request this type of output. This object is an iterable of `StreamedStr` (or `AsyncStreamedStr`) and `FunctionCall` instances.
+
+!!! warning "Consuming StreamedStr"
+
+    The StreamedStr object must be iterated over before the next item in the `StreamedResponse` is processed, otherwise the string output will be lost. This is because the `StreamedResponse` and `StreamedStr` share the same underlying generator, so advancing the `StreamedResponse` iterator skips over the `StreamedStr` items. The `StreamedStr` object has internal caching so after iterating over it once the chunks will remain available.
+
+In the example below, we request that the LLM generates a greeting and then calls a function to get the weather for two cities. The `StreamedResponse` object is then iterated over to print the output, and the `StreamedStr` and `FunctionCall` items are processed separately.
+
+```python
+from magentic import prompt, FunctionCall, StreamedResponse, StreamedStr
+
+
+def get_weather(city: str) -> str:
+    return f"The weather in {city} is 20°C."
+
+
+@prompt(
+    "Say hello, then get the weather for: {cities}",
+    functions=[get_weather],
+)
+def describe_weather(cities: list[str]) -> StreamedResponse: ...
+
+
+response = describe_weather(["Cape Town", "San Francisco"])
+for item in response:
+    if isinstance(item, StreamedStr):
+        for chunk in item:
+            # print the chunks as they are received
+            print(chunk, sep="", end="")
+        print()
+    if isinstance(item, FunctionCall):
+        # print the function call, then call it and print the result
+        print(item)
+        print(item())
+
+# Hello! I'll get the weather for Cape Town and San Francisco for you.
+# FunctionCall(<function get_weather at 0x1109825c0>, 'Cape Town')
+# The weather in Cape Town is 20°C.
+# FunctionCall(<function get_weather at 0x1109825c0>, 'San Francisco')
+# The weather in San Francisco is 20°C.
+```
diff --git a/docs/structured-outputs.md b/docs/structured-outputs.md
index 69f660c..1b1e1f6 100644
--- a/docs/structured-outputs.md
+++ b/docs/structured-outputs.md
@@ -148,6 +148,10 @@ print(hero_defeated)
 
 ## Chain-of-Thought Prompting
 
+!!! warning "StreamedResponse"
+
+    It is now recommended to use `StreamedResponse` for chain-of-thought prompting, as this uses the LLM provider's native chain-of-thought capabilities. See [StreamedResponse](streaming.md#StreamedResponse) for more information.
+
 Using a simple Python type as the return annotation might result in poor results as the LLM has no time to arrange its thoughts before answering. To allow the LLM to work through this "chain of thought" you can instead return a pydantic model with initial fields for explaining the final response.
 
 ```python hl_lines="5-9 20"
diff --git a/pyproject.toml b/pyproject.toml
index f6e3434..33c5f61 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,6 +37,7 @@ dev = [
     "coverage>=7.6.4",
     "pytest-mock>=3.14.0",
     "vcrpy>=6.0.2",
+    "pydeps>=2.0.1",
 ]
 docs = [
     "blacken-docs>=1.16.0",
diff --git a/src/magentic/__init__.py b/src/magentic/__init__.py
index def56b2..0a5086f 100644
--- a/src/magentic/__init__.py
+++ b/src/magentic/__init__.py
@@ -1,5 +1,7 @@
 from ._pydantic import ConfigDict as ConfigDict
 from ._pydantic import with_config as with_config
+from ._streamed_response import AsyncStreamedResponse as AsyncStreamedResponse
+from ._streamed_response import StreamedResponse as StreamedResponse
 from .chat_model.message import AnyMessage as AnyMessage
 from .chat_model.message import AssistantMessage as AssistantMessage
 from .chat_model.message import FunctionResultMessage as FunctionResultMessage
diff --git a/src/magentic/_parsing.py b/src/magentic/_parsing.py
new file mode 100644
index 0000000..eb9c56c
--- /dev/null
+++ b/src/magentic/_parsing.py
@@ -0,0 +1,27 @@
+"""Functions for parsing and checking return types."""
+
+from collections.abc import Iterable
+
+from magentic._streamed_response import AsyncStreamedResponse, StreamedResponse
+from magentic.function_call import AsyncParallelFunctionCall, ParallelFunctionCall
+from magentic.streaming import AsyncStreamedStr, StreamedStr
+from magentic.typing import is_any_origin_subclass
+
+
+def contains_string_type(types: Iterable[type]) -> bool:
+    return is_any_origin_subclass(
+        types,
+        (str, StreamedStr, AsyncStreamedStr, StreamedResponse, AsyncStreamedResponse),
+    )
+
+
+def contains_parallel_function_call_type(types: Iterable[type]) -> bool:
+    return is_any_origin_subclass(
+        types,
+        (
+            ParallelFunctionCall,
+            AsyncParallelFunctionCall,
+            StreamedResponse,
+            AsyncStreamedResponse,
+        ),
+    )
diff --git a/src/magentic/_streamed_response.py b/src/magentic/_streamed_response.py
new file mode 100644
index 0000000..56a85fc
--- /dev/null
+++ b/src/magentic/_streamed_response.py
@@ -0,0 +1,63 @@
+from collections.abc import AsyncIterable, AsyncIterator, Iterable, Iterator
+from typing import Any
+
+from magentic.function_call import FunctionCall
+from magentic.streaming import (
+    AsyncStreamedStr,
+    CachedAsyncIterable,
+    CachedIterable,
+    StreamedStr,
+)
+
+
+class StreamedResponse:
+    """A streamed LLM response consisting of text output and tool calls.
+
+    This is an iterable of StreamedStr and FunctionCall instances.
+
+    Examples
+    --------
+    >>> from magentic import prompt, StreamedResponse, StreamedStr, FunctionCall
+    >>>
+    >>> def get_weather(city: str) -> str:
+    >>>     return f"The weather in {city} is 20°C."
+    >>>
+    >>> @prompt(
+    >>>     "Say hello, then get the weather for: {cities}",
+    >>>     functions=[get_weather],
+    >>> )
+    >>> def describe_weather(cities: list[str]) -> StreamedResponse: ...
+    >>>
+    >>> response = describe_weather(["Cape Town", "San Francisco"])
+    >>>
+    >>> for item in response:
+    >>>     if isinstance(item, StreamedStr):
+    >>>         for chunk in item:
+    >>>             print(chunk, sep="", end="")
+    >>>         print()
+    >>>     if isinstance(item, FunctionCall):
+    >>>         print(item)
+    >>>         print(item())
+    Hello! I'll get the weather for Cape Town and San Francisco for you.
+    FunctionCall(<function get_weather at 0x1109825c0>, 'Cape Town')
+    The weather in Cape Town is 20°C.
+    FunctionCall(<function get_weather at 0x1109825c0>, 'San Francisco')
+    The weather in San Francisco is 20°C.
+    """
+
+    def __init__(self, stream: Iterable[StreamedStr | FunctionCall[Any]]):
+        self._stream = CachedIterable(stream)
+
+    def __iter__(self) -> Iterator[StreamedStr | FunctionCall[Any]]:
+        yield from self._stream
+
+
+class AsyncStreamedResponse:
+    """Async version of `StreamedResponse`."""
+
+    def __init__(self, stream: AsyncIterable[AsyncStreamedStr | FunctionCall[Any]]):
+        self._stream = CachedAsyncIterable(stream)
+
+    async def __aiter__(self) -> AsyncIterator[AsyncStreamedStr | FunctionCall[Any]]:
+        async for item in self._stream:
+            yield item
diff --git a/src/magentic/chat_model/anthropic_chat_model.py b/src/magentic/chat_model/anthropic_chat_model.py
index b4ce4e7..fe60ded 100644
--- a/src/magentic/chat_model/anthropic_chat_model.py
+++ b/src/magentic/chat_model/anthropic_chat_model.py
@@ -14,6 +14,7 @@
 
 import filetype
 
+from magentic._parsing import contains_parallel_function_call_type, contains_string_type
 from magentic.chat_model.base import (
     ChatModel,
     aparse_stream,
@@ -48,19 +49,18 @@
     ParallelFunctionCall,
     _create_unique_id,
 )
-from magentic.streaming import (
-    AsyncStreamedStr,
-    StreamedStr,
-)
-from magentic.typing import is_any_origin_subclass
 from magentic.vision import UserImageMessage
 
 try:
     import anthropic
     from anthropic.lib.streaming import MessageStreamEvent
     from anthropic.lib.streaming._messages import accumulate_event
-    from anthropic.types import MessageParam, ToolParam
-    from anthropic.types.message_create_params import ToolChoice
+    from anthropic.types import (
+        MessageParam,
+        ToolChoiceParam,
+        ToolChoiceToolParam,
+        ToolParam,
+    )
 except ImportError as error:
     msg = "To use AnthropicChatModel you must install the `anthropic` package using `pip install 'magentic[anthropic]'`."
     raise ImportError(msg) from error
@@ -157,6 +157,8 @@ def _(message: AssistantMessage[Any]) -> MessageParam:
             ],
         }
 
+    # TODO: Add support for StreamedResponse here
+
     function_schema = function_schema_for_type(type(message.content))
     return {
         "role": AnthropicMessageRole.ASSISTANT.value,
@@ -227,7 +229,7 @@ def to_dict(self) -> ToolParam:
             "input_schema": self._function_schema.parameters,
         }
 
-    def as_tool_choice(self) -> ToolChoice:
+    def as_tool_choice(self, *, disable_parallel_tool_use: bool) -> ToolChoiceToolParam:
         return {"type": "tool", "name": self._function_schema.name}
 
 
@@ -353,14 +355,19 @@ def temperature(self) -> float | None:
     def _get_tool_choice(
         *,
         tool_schemas: Sequence[BaseFunctionToolSchema[Any]],
-        allow_string_output: bool,
-    ) -> ToolChoice | anthropic.NotGiven:
+        output_types: Iterable[type],
+    ) -> ToolChoiceParam | anthropic.NotGiven:
         """Create the tool choice argument."""
-        if allow_string_output:
+        if contains_string_type(output_types):
             return anthropic.NOT_GIVEN
+        disable_parallel_tool_use = not contains_parallel_function_call_type(
+            output_types
+        )
         if len(tool_schemas) == 1:
-            return tool_schemas[0].as_tool_choice()
-        return {"type": "any"}
+            return tool_schemas[0].as_tool_choice(
+                disable_parallel_tool_use=disable_parallel_tool_use
+            )
+        return {"type": "any", "disable_parallel_tool_use": disable_parallel_tool_use}
 
     @overload
     def complete(
@@ -397,8 +404,6 @@ def complete(
         function_schemas = get_function_schemas(functions, output_types)
         tool_schemas = [BaseFunctionToolSchema(schema) for schema in function_schemas]
 
-        allow_string_output = is_any_origin_subclass(output_types, (str, StreamedStr))
-
         system, messages = _extract_system_message(messages)
 
         response: Iterator[MessageStreamEvent] = self._client.messages.stream(
@@ -412,7 +417,7 @@ def complete(
             temperature=_if_given(self.temperature),
             tools=[schema.to_dict() for schema in tool_schemas] or anthropic.NOT_GIVEN,
             tool_choice=self._get_tool_choice(
-                tool_schemas=tool_schemas, allow_string_output=allow_string_output
+                tool_schemas=tool_schemas, output_types=output_types
             ),
         ).__enter__()
         stream = OutputStream(
@@ -460,10 +465,6 @@ async def acomplete(
         function_schemas = get_async_function_schemas(functions, output_types)
         tool_schemas = [BaseFunctionToolSchema(schema) for schema in function_schemas]
 
-        allow_string_output = is_any_origin_subclass(
-            output_types, (str, AsyncStreamedStr)
-        )
-
         system, messages = _extract_system_message(messages)
 
         response: AsyncIterator[
@@ -479,7 +480,7 @@ async def acomplete(
             temperature=_if_given(self.temperature),
             tools=[schema.to_dict() for schema in tool_schemas] or anthropic.NOT_GIVEN,
             tool_choice=self._get_tool_choice(
-                tool_schemas=tool_schemas, allow_string_output=allow_string_output
+                tool_schemas=tool_schemas, output_types=output_types
             ),
         ).__aenter__()
         stream = AsyncOutputStream(
diff --git a/src/magentic/chat_model/base.py b/src/magentic/chat_model/base.py
index 2575097..4657033 100644
--- a/src/magentic/chat_model/base.py
+++ b/src/magentic/chat_model/base.py
@@ -7,6 +7,7 @@
 
 from pydantic import ValidationError
 
+from magentic._streamed_response import AsyncStreamedResponse, StreamedResponse
 from magentic.chat_model.message import AssistantMessage, Message
 from magentic.function_call import (
     AsyncParallelFunctionCall,
@@ -22,6 +23,7 @@
 )
 
 
+# TODO: Export all exceptions from `magentic.exceptions`
 # TODO: Parent class with `output_message` attribute ?
 class StringNotAllowedError(Exception):
     """Raised when a string is returned by the LLM but not allowed."""
@@ -79,6 +81,7 @@ def __init__(self, output_message: Message[Any], tool_call_id: str, tool_name: s
         self.tool_call_id = tool_call_id
 
 
+# TODO: Move this to same file where it is raised
 class ToolSchemaParseError(Exception):
     """Raised when the LLM output could not be parsed by the tool schema."""
 
@@ -100,6 +103,7 @@ def __init__(
         self.validation_error = validation_error
 
 
+# TODO: Move this into _parsing
 # TODO: Make this a stream class with a close method and context management
 def parse_stream(stream: Iterator[Any], output_types: Iterable[type[R]]) -> R:
     """Parse and validate the LLM output stream against the allowed output types."""
@@ -107,14 +111,17 @@ def parse_stream(stream: Iterator[Any], output_types: Iterable[type[R]]) -> R:
     # TODO: option to error/warn/ignore extra objects
     # TODO: warn for degenerate output types ?
     obj = next(stream)
-    # TODO: Add type for mixed StreamedStr and FunctionCalls
     if isinstance(obj, StreamedStr):
+        if StreamedResponse in output_type_origins:
+            return cast(R, StreamedResponse(chain([obj], stream)))
         if StreamedStr in output_type_origins:
             return cast(R, obj)
         if str in output_type_origins:
             return cast(R, str(obj))
         raise StringNotAllowedError(obj.truncate(100))
     if isinstance(obj, FunctionCall):
+        if StreamedResponse in output_type_origins:
+            return cast(R, StreamedResponse(chain([obj], stream)))
         if ParallelFunctionCall in output_type_origins:
             return cast(R, ParallelFunctionCall(chain([obj], stream)))
         if FunctionCall in output_type_origins:
@@ -133,12 +140,16 @@ async def aparse_stream(
     output_type_origins = [get_origin(type_) or type_ for type_ in output_types]
     obj = await anext(stream)
     if isinstance(obj, AsyncStreamedStr):
+        if AsyncStreamedResponse in output_type_origins:
+            return cast(R, AsyncStreamedResponse(achain(async_iter([obj]), stream)))
         if AsyncStreamedStr in output_type_origins:
             return cast(R, obj)
         if str in output_type_origins:
             return cast(R, await obj.to_string())
         raise StringNotAllowedError(await obj.truncate(100))
     if isinstance(obj, FunctionCall):
+        if AsyncStreamedResponse in output_type_origins:
+            return cast(R, AsyncStreamedResponse(achain(async_iter([obj]), stream)))
         if AsyncParallelFunctionCall in output_type_origins:
             return cast(R, AsyncParallelFunctionCall(achain(async_iter([obj]), stream)))
         if FunctionCall in output_type_origins:
diff --git a/src/magentic/chat_model/function_schema.py b/src/magentic/chat_model/function_schema.py
index 2d84c11..92595e8 100644
--- a/src/magentic/chat_model/function_schema.py
+++ b/src/magentic/chat_model/function_schema.py
@@ -9,6 +9,7 @@
 from pydantic import BaseModel, TypeAdapter, create_model
 
 from magentic._pydantic import ConfigDict, get_pydantic_config, json_schema
+from magentic._streamed_response import AsyncStreamedResponse, StreamedResponse
 from magentic.function_call import (
     AsyncParallelFunctionCall,
     FunctionCall,
@@ -461,6 +462,8 @@ def serialize_args(self, value: FunctionCall[T]) -> str:
     FunctionCall,
     ParallelFunctionCall,
     AsyncParallelFunctionCall,
+    StreamedResponse,
+    AsyncStreamedResponse,
 )
 
 
diff --git a/src/magentic/chat_model/litellm_chat_model.py b/src/magentic/chat_model/litellm_chat_model.py
index 1bbde01..f38c9d7 100644
--- a/src/magentic/chat_model/litellm_chat_model.py
+++ b/src/magentic/chat_model/litellm_chat_model.py
@@ -5,6 +5,7 @@
 from openai.lib.streaming.chat._completions import ChatCompletionStreamState
 from openai.types.chat import ChatCompletionNamedToolChoiceParam
 
+from magentic._parsing import contains_string_type
 from magentic.chat_model.base import ChatModel, aparse_stream, parse_stream
 from magentic.chat_model.function_schema import (
     get_async_function_schemas,
@@ -22,11 +23,6 @@
     StreamParser,
     StreamState,
 )
-from magentic.streaming import (
-    AsyncStreamedStr,
-    StreamedStr,
-)
-from magentic.typing import is_any_origin_subclass
 
 try:
     import litellm
@@ -154,10 +150,10 @@ def custom_llm_provider(self) -> str | None:
     def _get_tool_choice(
         *,
         tool_schemas: Sequence[BaseFunctionToolSchema[Any]],
-        allow_string_output: bool,
+        output_types: Iterable[type[R]],
     ) -> ChatCompletionNamedToolChoiceParam | Literal["required"] | None:
         """Create the tool choice argument."""
-        if allow_string_output:
+        if contains_string_type(output_types):
             return None
         if len(tool_schemas) == 1:
             return tool_schemas[0].as_tool_choice()
@@ -198,8 +194,6 @@ def complete(
         function_schemas = get_function_schemas(functions, output_types)
         tool_schemas = [BaseFunctionToolSchema(schema) for schema in function_schemas]
 
-        allow_string_output = is_any_origin_subclass(output_types, (str, StreamedStr))
-
         response = litellm.completion(
             model=self.model,
             messages=[message_to_openai_message(m) for m in messages],
@@ -213,7 +207,7 @@ def complete(
             temperature=self.temperature,
             tools=[schema.to_dict() for schema in tool_schemas] or None,
             tool_choice=self._get_tool_choice(
-                tool_schemas=tool_schemas, allow_string_output=allow_string_output
+                tool_schemas=tool_schemas, output_types=output_types
             ),  # type: ignore[arg-type,unused-ignore]
         )
         assert not isinstance(response, ModelResponse)  # noqa: S101
@@ -260,10 +254,6 @@ async def acomplete(
         function_schemas = get_async_function_schemas(functions, output_types)
         tool_schemas = [BaseFunctionToolSchema(schema) for schema in function_schemas]
 
-        allow_string_output = is_any_origin_subclass(
-            output_types, (str, AsyncStreamedStr)
-        )
-
         response = await litellm.acompletion(
             model=self.model,
             messages=[message_to_openai_message(m) for m in messages],
@@ -277,7 +267,7 @@ async def acomplete(
             temperature=self.temperature,
             tools=[schema.to_dict() for schema in tool_schemas] or None,
             tool_choice=self._get_tool_choice(
-                tool_schemas=tool_schemas, allow_string_output=allow_string_output
+                tool_schemas=tool_schemas, output_types=output_types
             ),  # type: ignore[arg-type,unused-ignore]
         )
         assert not isinstance(response, ModelResponse)  # noqa: S101
diff --git a/src/magentic/chat_model/mistral_chat_model.py b/src/magentic/chat_model/mistral_chat_model.py
index b9a28df..5b0941b 100644
--- a/src/magentic/chat_model/mistral_chat_model.py
+++ b/src/magentic/chat_model/mistral_chat_model.py
@@ -6,6 +6,7 @@
 import openai
 from openai.types.chat import ChatCompletionStreamOptionsParam
 
+from magentic._parsing import contains_string_type
 from magentic.chat_model.base import ChatModel
 from magentic.chat_model.message import AssistantMessage, Message
 from magentic.chat_model.openai_chat_model import (
@@ -26,22 +27,23 @@ class _MistralToolChoice(Enum):
 class _MistralOpenaiChatModel(OpenaiChatModel):
     """Modified OpenaiChatModel to be compatible with Mistral API."""
 
-    @staticmethod
-    def _get_stream_options() -> ChatCompletionStreamOptionsParam | openai.NotGiven:
+    def _get_stream_options(self) -> ChatCompletionStreamOptionsParam | openai.NotGiven:
         return openai.NOT_GIVEN
 
     @staticmethod
     def _get_tool_choice(  # type: ignore[override]
         *,
         tool_schemas: Sequence[BaseFunctionToolSchema[Any]],
-        allow_string_output: bool,
+        output_types: Iterable[type],
     ) -> str | openai.NotGiven:
         """Create the tool choice argument.
 
         Mistral API has different options than the OpenAI API for `tool_choice`.
         See https://docs.mistral.ai/capabilities/function_calling/#tool_choice
         """
-        return openai.NOT_GIVEN if allow_string_output else _MistralToolChoice.ANY.value
+        if contains_string_type(output_types):
+            return openai.NOT_GIVEN
+        return _MistralToolChoice.ANY.value
 
     def _get_parallel_tool_calls(
         self, *, tools_specified: bool, output_types: Iterable[type]
diff --git a/src/magentic/chat_model/openai_chat_model.py b/src/magentic/chat_model/openai_chat_model.py
index 9461c6f..b128948 100644
--- a/src/magentic/chat_model/openai_chat_model.py
+++ b/src/magentic/chat_model/openai_chat_model.py
@@ -22,6 +22,8 @@
     ChatCompletionToolParam,
 )
 
+from magentic._parsing import contains_parallel_function_call_type, contains_string_type
+from magentic._streamed_response import StreamedResponse
 from magentic.chat_model.base import (
     ChatModel,
     aparse_stream,
@@ -52,16 +54,11 @@
     StreamState,
 )
 from magentic.function_call import (
-    AsyncParallelFunctionCall,
     FunctionCall,
     ParallelFunctionCall,
     _create_unique_id,
 )
-from magentic.streaming import (
-    AsyncStreamedStr,
-    StreamedStr,
-)
-from magentic.typing import is_any_origin_subclass
+from magentic.streaming import StreamedStr
 from magentic.vision import UserImageMessage
 
 
@@ -158,6 +155,32 @@ def _(message: AssistantMessage[Any]) -> ChatCompletionMessageParam:
             ],
         }
 
+    if isinstance(message.content, StreamedResponse):
+        content: list[str] = []
+        function_calls: list[FunctionCall[Any]] = []
+        for item in message.content:
+            if isinstance(item, StreamedStr):
+                content.append(str(item))
+            elif isinstance(item, FunctionCall):
+                function_calls.append(item)
+        return {
+            "role": OpenaiMessageRole.ASSISTANT.value,
+            "content": " ".join(content),
+            "tool_calls": [
+                {
+                    "id": function_call._unique_id,
+                    "type": "function",
+                    "function": {
+                        "name": FunctionCallFunctionSchema(function_call.function).name,
+                        "arguments": FunctionCallFunctionSchema(
+                            function_call.function
+                        ).serialize_args(function_call),
+                    },
+                }
+                for function_call in function_calls
+            ],
+        }
+
     function_schema = function_schema_for_type(type(message.content))
     return {
         "role": OpenaiMessageRole.ASSISTANT.value,
@@ -391,10 +414,10 @@ def _get_stream_options(self) -> ChatCompletionStreamOptionsParam | openai.NotGi
     def _get_tool_choice(
         *,
         tool_schemas: Sequence[BaseFunctionToolSchema[Any]],
-        allow_string_output: bool,
+        output_types: Iterable[type],
     ) -> ChatCompletionToolChoiceOptionParam | openai.NotGiven:
         """Create the tool choice argument."""
-        if allow_string_output:
+        if contains_string_type(output_types):
             return openai.NOT_GIVEN
         if len(tool_schemas) == 1:
             return tool_schemas[0].as_tool_choice()
@@ -407,9 +430,7 @@ def _get_parallel_tool_calls(
             return openai.NOT_GIVEN
         if self.api_type == "azure":
             return openai.NOT_GIVEN
-        if is_any_origin_subclass(output_types, ParallelFunctionCall):
-            return openai.NOT_GIVEN
-        if is_any_origin_subclass(output_types, AsyncParallelFunctionCall):
+        if contains_parallel_function_call_type(output_types):
             return openai.NOT_GIVEN
         return False
 
@@ -449,8 +470,6 @@ def complete(
         function_schemas = get_function_schemas(functions, output_types)
         tool_schemas = [BaseFunctionToolSchema(schema) for schema in function_schemas]
 
-        allow_string_output = is_any_origin_subclass(output_types, (str, StreamedStr))
-
         response: Iterator[ChatCompletionChunk] = self._client.chat.completions.create(
             model=self.model,
             messages=_add_missing_tool_calls_responses(
@@ -464,7 +483,7 @@ def complete(
             temperature=_if_given(self.temperature),
             tools=[schema.to_dict() for schema in tool_schemas] or openai.NOT_GIVEN,
             tool_choice=self._get_tool_choice(
-                tool_schemas=tool_schemas, allow_string_output=allow_string_output
+                tool_schemas=tool_schemas, output_types=output_types
             ),
             parallel_tool_calls=self._get_parallel_tool_calls(
                 tools_specified=bool(tool_schemas), output_types=output_types
@@ -515,10 +534,6 @@ async def acomplete(
         function_schemas = get_async_function_schemas(functions, output_types)
         tool_schemas = [BaseFunctionToolSchema(schema) for schema in function_schemas]
 
-        allow_string_output = is_any_origin_subclass(
-            output_types, (str, AsyncStreamedStr)
-        )
-
         response: AsyncIterator[
             ChatCompletionChunk
         ] = await self._async_client.chat.completions.create(
@@ -534,7 +549,7 @@ async def acomplete(
             temperature=_if_given(self.temperature),
             tools=[schema.to_dict() for schema in tool_schemas] or openai.NOT_GIVEN,
             tool_choice=self._get_tool_choice(
-                tool_schemas=tool_schemas, allow_string_output=allow_string_output
+                tool_schemas=tool_schemas, output_types=output_types
             ),
             parallel_tool_calls=self._get_parallel_tool_calls(
                 tools_specified=bool(tool_schemas), output_types=output_types
diff --git a/src/magentic/chat_model/stream.py b/src/magentic/chat_model/stream.py
index 1ca3c79..da08f28 100644
--- a/src/magentic/chat_model/stream.py
+++ b/src/magentic/chat_model/stream.py
@@ -17,8 +17,10 @@
     StreamedStr,
     aapply,
     achain,
+    aconsume,
     apply,
     async_iter,
+    consume,
 )
 
 ItemT = TypeVar("ItemT")
@@ -116,6 +118,7 @@ def _tool_call(
                 yield item.args
 
     def __stream__(self) -> Iterator[StreamedStr | OutputT]:
+        # This works similarly to `itertools.groupby`
         stream = apply(self._state.update, self._stream)
         current_item_ref = [next(stream)]
         while current_item_ref:
@@ -123,8 +126,11 @@ def __stream__(self) -> Iterator[StreamedStr | OutputT]:
             if self._parser.is_content(current_item):
                 stream = chain([current_item], stream)
                 yield StreamedStr(self._streamed_str(stream, current_item_ref))
+                if not current_item_ref:
+                    # Finish the group to allow advancing to the next one
+                    consume(self._streamed_str(stream, current_item_ref))
             elif self._parser.is_tool_call(current_item):
-                tool_calls_stream = (
+                tool_calls_stream: Iterator[FunctionCallChunk] = (
                     tool_call_chunk
                     for item in chain([current_item], stream)
                     for tool_call_chunk in self._parser.iter_tool_calls(item)
@@ -146,13 +152,24 @@ def __stream__(self) -> Iterator[StreamedStr | OutputT]:
                             tool_name=current_tool_call_chunk.name,
                         )
                     try:
+                        tool_calls_stream = chain(
+                            [current_tool_call_chunk], tool_calls_stream
+                        )
                         yield function_schema.parse_args(
                             self._tool_call(
-                                chain([current_tool_call_chunk], tool_calls_stream),
-                                tool_call_ref,
-                                current_tool_call_id,
+                                tool_calls_stream, tool_call_ref, current_tool_call_id
                             )
                         )
+                        if not tool_call_ref:
+                            # Finish the group to allow advancing to the next one
+                            consume(
+                                self._tool_call(
+                                    tool_calls_stream,
+                                    tool_call_ref,
+                                    current_tool_call_id,
+                                )
+                            )
+
                     except ValidationError as e:
                         assert current_tool_call_id is not None  # noqa: S101
                         raise ToolSchemaParseError(
@@ -227,8 +244,11 @@ async def __stream__(self) -> AsyncIterator[AsyncStreamedStr | OutputT]:
             if self._parser.is_content(current_item):
                 stream = achain(async_iter([current_item]), stream)
                 yield AsyncStreamedStr(self._streamed_str(stream, current_item_ref))
+                if not current_item_ref:
+                    # Finish the group to allow advancing to the next one
+                    await aconsume(self._streamed_str(stream, current_item_ref))
             elif self._parser.is_tool_call(current_item):
-                tool_calls_stream = (
+                tool_calls_stream: AsyncIterator[FunctionCallChunk] = (
                     tool_call_chunk
                     async for item in achain(async_iter([current_item]), stream)
                     for tool_call_chunk in self._parser.iter_tool_calls(item)
@@ -250,16 +270,23 @@ async def __stream__(self) -> AsyncIterator[AsyncStreamedStr | OutputT]:
                             tool_name=current_tool_call_chunk.name,
                         )
                     try:
+                        tool_calls_stream = achain(
+                            async_iter([current_tool_call_chunk]), tool_calls_stream
+                        )
                         yield await function_schema.aparse_args(
                             self._tool_call(
-                                achain(
-                                    async_iter([current_tool_call_chunk]),
-                                    tool_calls_stream,
-                                ),
-                                tool_call_ref,
-                                current_tool_call_id,
+                                tool_calls_stream, tool_call_ref, current_tool_call_id
                             )
                         )
+                        if not tool_call_ref:
+                            # Finish the group to allow advancing to the next one
+                            await aconsume(
+                                self._tool_call(
+                                    tool_calls_stream,
+                                    tool_call_ref,
+                                    current_tool_call_id,
+                                )
+                            )
                     except ValidationError as e:
                         assert current_tool_call_id is not None  # noqa: S101
                         raise ToolSchemaParseError(
diff --git a/src/magentic/streaming.py b/src/magentic/streaming.py
index e87af08..fdc6881 100644
--- a/src/magentic/streaming.py
+++ b/src/magentic/streaming.py
@@ -1,4 +1,5 @@
 import asyncio
+import collections
 import textwrap
 from collections.abc import AsyncIterable, AsyncIterator, Callable, Iterable, Iterator
 from dataclasses import dataclass
@@ -84,6 +85,17 @@ async def atakewhile(
         yield item
 
 
+def consume(iterator: Iterator[T]) -> None:
+    """Consume an iterator."""
+    collections.deque(iterator, maxlen=0)
+
+
+async def aconsume(aiterable: AsyncIterable[T]) -> None:
+    """Async version of `consume`."""
+    async for _ in aiterable:
+        pass
+
+
 async def agroupby(
     aiterable: AsyncIterable[T], key: Callable[[T], object]
 ) -> AsyncIterator[tuple[object, AsyncIterator[T]]]:
@@ -103,10 +115,11 @@ async def agroup(
     while transition:
         transition_item = transition.pop()
         group_key = key(transition_item)
-        yield (
-            group_key,
-            agroup(achain(async_iter([transition_item]), aiterator), group_key),
-        )
+        aiterator = achain(async_iter([transition_item]), aiterator)
+        yield (group_key, agroup(aiterator, group_key))
+        # Finish the group to allow advancing to the next one
+        if not transition:
+            await aconsume(agroup(aiterator, group_key))
 
 
 @dataclass
diff --git a/tests/chat_model/cassettes/test_anthropic_chat_model/test_anthropic_chat_model_acomplete_async_parallel_function_call.yaml b/tests/chat_model/cassettes/test_anthropic_chat_model/test_anthropic_chat_model_acomplete_async_parallel_function_call.yaml
index b73ba3e..58a6863 100644
--- a/tests/chat_model/cassettes/test_anthropic_chat_model/test_anthropic_chat_model_acomplete_async_parallel_function_call.yaml
+++ b/tests/chat_model/cassettes/test_anthropic_chat_model/test_anthropic_chat_model_acomplete_async_parallel_function_call.yaml
@@ -8,7 +8,8 @@ interactions:
       "required": ["a", "b"], "type": "object"}}, {"name": "minus", "description":
       "", "input_schema": {"properties": {"a": {"title": "A", "type": "integer"},
       "b": {"title": "B", "type": "integer"}}, "required": ["a", "b"], "type": "object"}}],
-      "tool_choice": {"type": "any"}, "stream": true}'
+      "tool_choice": {"type": "any", "disable_parallel_tool_use": false}, "stream":
+      true}'
     headers:
       accept:
       - application/json
@@ -19,7 +20,7 @@ interactions:
       connection:
       - keep-alive
       content-length:
-      - '683'
+      - '719'
       content-type:
       - application/json
       host:
@@ -50,12 +51,12 @@ interactions:
     body:
       string: 'event: message_start
 
-        data: {"type":"message_start","message":{"id":"msg_012X15Kcz2Wu677j3nd9FqEh","type":"message","role":"assistant","model":"claude-3-opus-20240229","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":469,"output_tokens":8}}           }
+        data: {"type":"message_start","message":{"id":"msg_01VmimdL9rtzYarNBzEM3hTj","type":"message","role":"assistant","model":"claude-3-opus-20240229","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":469,"output_tokens":8}}  }
 
 
         event: content_block_start
 
-        data: {"type":"content_block_start","index":0,"content_block":{"type":"tool_use","id":"toolu_01RJGtVku2YsNB6VPCM1vD7x","name":"plus","input":{}}   }
+        data: {"type":"content_block_start","index":0,"content_block":{"type":"tool_use","id":"toolu_01Q54GZYYAzL3HVr4LQDbfXC","name":"plus","input":{}}         }
 
 
         event: ping
@@ -70,73 +71,88 @@ interactions:
 
         event: content_block_delta
 
-        data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":"{\"a\":
-        1"} }
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":"{\"a\":"}         }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":"
+        1"}          }
 
 
         event: content_block_delta
 
         data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":",
-        "}             }
+        \""}    }
 
 
         event: content_block_delta
 
-        data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":"\"b\""}           }
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":"b\""}           }
 
 
         event: content_block_delta
 
         data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":":
-        2}"}   }
+        2}"}       }
 
 
         event: content_block_stop
 
-        data: {"type":"content_block_stop","index":0          }
+        data: {"type":"content_block_stop","index":0   }
 
 
         event: content_block_start
 
-        data: {"type":"content_block_start","index":1,"content_block":{"type":"tool_use","id":"toolu_01GuGH37ceFTQc3k53ui7JvQ","name":"minus","input":{}}               }
+        data: {"type":"content_block_start","index":1,"content_block":{"type":"tool_use","id":"toolu_017NYD2BGVvRppZKAX98qedy","name":"minus","input":{}}}
 
 
         event: content_block_delta
 
-        data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":""}       }
+        data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":""}    }
 
 
         event: content_block_delta
 
-        data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"{\""}     }
+        data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"{\"a\":"}      }
 
 
         event: content_block_delta
 
-        data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"a\":
-        2"}            }
+        data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"
+        2"}     }
 
 
         event: content_block_delta
 
         data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":",
-        \"b\": 1}"}         }
+        \"b"}  }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"\":"}}
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"
+        1}"}   }
 
 
         event: content_block_stop
 
-        data: {"type":"content_block_stop","index":1              }
+        data: {"type":"content_block_stop","index":1           }
 
 
         event: message_delta
 
-        data: {"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"output_tokens":104}
-        }
+        data: {"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"output_tokens":104}            }
 
 
         event: message_stop
 
-        data: {"type":"message_stop"             }
+        data: {"type":"message_stop"     }
 
 
         '
@@ -144,7 +160,7 @@ interactions:
       CF-Cache-Status:
       - DYNAMIC
       CF-RAY:
-      - 8e3691fa6dc69667-SJC
+      - 8ea6a39c9b8d9e70-SJC
       Cache-Control:
       - no-cache
       Connection:
@@ -152,7 +168,7 @@ interactions:
       Content-Type:
       - text/event-stream; charset=utf-8
       Date:
-      - Sat, 16 Nov 2024 09:49:21 GMT
+      - Sat, 30 Nov 2024 00:14:46 GMT
       Server:
       - cloudflare
       Transfer-Encoding:
@@ -160,7 +176,7 @@ interactions:
       X-Robots-Tag:
       - none
       request-id:
-      - req_01GXWHQw2cE2gB1c1CW9tXmz
+      - req_01F3seNxjq11ZuZCh1yt41L5
       via:
       - 1.1 google
     status:
diff --git a/tests/chat_model/cassettes/test_anthropic_chat_model/test_anthropic_chat_model_acomplete_async_streamed_response.yaml b/tests/chat_model/cassettes/test_anthropic_chat_model/test_anthropic_chat_model_acomplete_async_streamed_response.yaml
new file mode 100644
index 0000000..9cfbe0e
--- /dev/null
+++ b/tests/chat_model/cassettes/test_anthropic_chat_model/test_anthropic_chat_model_acomplete_async_streamed_response.yaml
@@ -0,0 +1,222 @@
+interactions:
+- request:
+    body: '{"max_tokens": 1024, "messages": [{"role": "user", "content": [{"type":
+      "text", "text": "Pick a random city. Then get its weather."}]}], "model": "claude-3-opus-20240229",
+      "tools": [{"name": "get_weather", "description": "Get the weather for a location.",
+      "input_schema": {"properties": {"location": {"title": "Location", "type": "string"}},
+      "required": ["location"], "type": "object"}}], "stream": true}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      anthropic-version:
+      - '2023-06-01'
+      connection:
+      - keep-alive
+      content-length:
+      - '404'
+      content-type:
+      - application/json
+      host:
+      - api.anthropic.com
+      user-agent:
+      - AsyncAnthropic/Python 0.39.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 0.39.0
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.10.15
+      x-stainless-stream-helper:
+      - messages
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: 'event: message_start
+
+        data: {"type":"message_start","message":{"id":"msg_01SBqSC6Ceb3t3h6pXFNw3Rn","type":"message","role":"assistant","model":"claude-3-opus-20240229","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":610,"output_tokens":3}}  }
+
+
+        event: content_block_start
+
+        data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}             }
+
+
+        event: ping
+
+        data: {"type": "ping"}
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"<thinking>\nTo
+        get"}     }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        the weather for a city, the"}               }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        get_weather tool is needed. It"}    }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        requires a location parameter.\nThe user"}               }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        did not specify a city, but"}            }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        asked me to pick one at"}             }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        random. So I can in"}       }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"fer
+        the location parameter by selecting"}       }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        a city name myself.\nWith"} }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        the location inferred, I have"}}
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        all the required parameters to call the"}    }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        get_weather tool.\n</thinking"}}
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":">"}         }
+
+
+        event: content_block_stop
+
+        data: {"type":"content_block_stop","index":0     }
+
+
+        event: content_block_start
+
+        data: {"type":"content_block_start","index":1,"content_block":{"type":"tool_use","id":"toolu_01NeeKpbWNsqw53dKdgnCsYY","name":"get_weather","input":{}}            }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":""}      }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"{\""}    }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"location\""}    }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":":
+        \"T"}        }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"okyo"}  }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"\"}"}               }
+
+
+        event: content_block_stop
+
+        data: {"type":"content_block_stop","index":1   }
+
+
+        event: message_delta
+
+        data: {"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"output_tokens":135}               }
+
+
+        event: message_stop
+
+        data: {"type":"message_stop"              }
+
+
+        '
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8ea9a8d14bd1fa22-SJC
+      Cache-Control:
+      - no-cache
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Sat, 30 Nov 2024 09:02:36 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Robots-Tag:
+      - none
+      request-id:
+      - req_01QTs1VmrHugipEzMY2oxHfc
+      via:
+      - 1.1 google
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/chat_model/cassettes/test_anthropic_chat_model/test_anthropic_chat_model_complete_no_structured_output_error.yaml b/tests/chat_model/cassettes/test_anthropic_chat_model/test_anthropic_chat_model_complete_no_structured_output_error.yaml
index 8bd7715..360197f 100644
--- a/tests/chat_model/cassettes/test_anthropic_chat_model/test_anthropic_chat_model_complete_no_structured_output_error.yaml
+++ b/tests/chat_model/cassettes/test_anthropic_chat_model/test_anthropic_chat_model_complete_no_structured_output_error.yaml
@@ -6,8 +6,8 @@ interactions:
       "", "input_schema": {"properties": {"value": {"title": "Value", "type": "integer"}},
       "required": ["value"], "type": "object"}}, {"name": "return_bool", "description":
       "", "input_schema": {"properties": {"value": {"title": "Value", "type": "boolean"}},
-      "required": ["value"], "type": "object"}}], "tool_choice": {"type": "any"},
-      "stream": true}'
+      "required": ["value"], "type": "object"}}], "tool_choice": {"type": "any", "disable_parallel_tool_use":
+      true}, "stream": true}'
     headers:
       accept:
       - application/json
@@ -18,7 +18,7 @@ interactions:
       connection:
       - keep-alive
       content-length:
-      - '578'
+      - '613'
       content-type:
       - application/json
       host:
@@ -49,12 +49,12 @@ interactions:
     body:
       string: 'event: message_start
 
-        data: {"type":"message_start","message":{"id":"msg_01S86azh3xwx7QZoJxirtvhc","type":"message","role":"assistant","model":"claude-3-haiku-20240307","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":475,"output_tokens":8}}      }
+        data: {"type":"message_start","message":{"id":"msg_013F4zD8moBWwqcDFmVt1JXg","type":"message","role":"assistant","model":"claude-3-haiku-20240307","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":476,"output_tokens":8}}              }
 
 
         event: content_block_start
 
-        data: {"type":"content_block_start","index":0,"content_block":{"type":"tool_use","id":"toolu_01NKJhv1asrehTjeCV179BEQ","name":"return_int","input":{}}           }
+        data: {"type":"content_block_start","index":0,"content_block":{"type":"tool_use","id":"toolu_013gtjQ5DxQN5dxfuZSTFGxH","name":"return_int","input":{}}           }
 
 
         event: ping
@@ -64,33 +64,33 @@ interactions:
 
         event: content_block_delta
 
-        data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":""}  }
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":""}      }
 
 
         event: content_block_delta
 
-        data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":"{\"value\":
-        "}      }
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":"{\"value"}              }
 
 
         event: content_block_delta
 
-        data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":"42}"}    }
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":"\":
+        42}"}            }
 
 
         event: content_block_stop
 
-        data: {"type":"content_block_stop","index":0      }
+        data: {"type":"content_block_stop","index":0}
 
 
         event: message_delta
 
-        data: {"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"output_tokens":38}          }
+        data: {"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"output_tokens":29}   }
 
 
         event: message_stop
 
-        data: {"type":"message_stop"     }
+        data: {"type":"message_stop"    }
 
 
         '
@@ -98,7 +98,7 @@ interactions:
       CF-Cache-Status:
       - DYNAMIC
       CF-RAY:
-      - 8e36919a0aaf176d-SJC
+      - 8ea6a5ebbdc2cf27-SJC
       Cache-Control:
       - no-cache
       Connection:
@@ -106,7 +106,7 @@ interactions:
       Content-Type:
       - text/event-stream; charset=utf-8
       Date:
-      - Sat, 16 Nov 2024 09:49:03 GMT
+      - Sat, 30 Nov 2024 00:16:20 GMT
       Server:
       - cloudflare
       Transfer-Encoding:
@@ -114,7 +114,7 @@ interactions:
       X-Robots-Tag:
       - none
       request-id:
-      - req_01N9DCTgb5ZL8g5tqtAm9tS7
+      - req_01RzHjcwkY86FoPQQMbhoo5H
       via:
       - 1.1 google
     status:
diff --git a/tests/chat_model/cassettes/test_anthropic_chat_model/test_anthropic_chat_model_complete_parallel_function_call.yaml b/tests/chat_model/cassettes/test_anthropic_chat_model/test_anthropic_chat_model_complete_parallel_function_call.yaml
index f9aaeaf..cfa3353 100644
--- a/tests/chat_model/cassettes/test_anthropic_chat_model/test_anthropic_chat_model_complete_parallel_function_call.yaml
+++ b/tests/chat_model/cassettes/test_anthropic_chat_model/test_anthropic_chat_model_complete_parallel_function_call.yaml
@@ -8,7 +8,8 @@ interactions:
       "required": ["a", "b"], "type": "object"}}, {"name": "minus", "description":
       "", "input_schema": {"properties": {"a": {"title": "A", "type": "integer"},
       "b": {"title": "B", "type": "integer"}}, "required": ["a", "b"], "type": "object"}}],
-      "tool_choice": {"type": "any"}, "stream": true}'
+      "tool_choice": {"type": "any", "disable_parallel_tool_use": false}, "stream":
+      true}'
     headers:
       accept:
       - application/json
@@ -19,7 +20,7 @@ interactions:
       connection:
       - keep-alive
       content-length:
-      - '683'
+      - '719'
       content-type:
       - application/json
       host:
@@ -50,12 +51,12 @@ interactions:
     body:
       string: 'event: message_start
 
-        data: {"type":"message_start","message":{"id":"msg_012f3EyS95ZaC3e3BnCqTDa7","type":"message","role":"assistant","model":"claude-3-opus-20240229","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":469,"output_tokens":8}}              }
+        data: {"type":"message_start","message":{"id":"msg_01LZDJCvBkpvzQxKUK1YXzwc","type":"message","role":"assistant","model":"claude-3-opus-20240229","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":469,"output_tokens":12}}          }
 
 
         event: content_block_start
 
-        data: {"type":"content_block_start","index":0,"content_block":{"type":"tool_use","id":"toolu_01GsvW1rYits8MPJif9rkZ4G","name":"plus","input":{}}              }
+        data: {"type":"content_block_start","index":0,"content_block":{"type":"tool_use","id":"toolu_01CM7Nqnu3tF2UqzksSCxsqt","name":"plus","input":{}}               }
 
 
         event: ping
@@ -65,72 +66,78 @@ interactions:
 
         event: content_block_delta
 
-        data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":""}   }
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":""}           }
 
 
         event: content_block_delta
 
         data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":"{\"a\":
-        1"}        }
+        1"}     }
 
 
         event: content_block_delta
 
         data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":",
-        \"b\""}   }
+        \"b\": 2}"}          }
 
 
-        event: content_block_delta
+        event: content_block_stop
+
+        data: {"type":"content_block_stop","index":0          }
+
+
+        event: content_block_start
 
-        data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":":
-        "}            }
+        data: {"type":"content_block_start","index":1,"content_block":{"type":"tool_use","id":"toolu_01MGZBHepZSnpCrVLDT34yxu","name":"minus","input":{}}}
 
 
         event: content_block_delta
 
-        data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":"2}"}       }
+        data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":""}     }
 
 
-        event: content_block_stop
+        event: content_block_delta
 
-        data: {"type":"content_block_stop","index":0  }
+        data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"{\"a"}         }
 
 
-        event: content_block_start
+        event: content_block_delta
 
-        data: {"type":"content_block_start","index":1,"content_block":{"type":"tool_use","id":"toolu_01M2EwdtY31e6afyqWGhPCpc","name":"minus","input":{}}        }
+        data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"\":
+        2"}}
 
 
         event: content_block_delta
 
-        data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":""}   }
+        data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":",
+        \"b"}       }
 
 
         event: content_block_delta
 
-        data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"{\"a\":
-        2"}  }
+        data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"\":
+        "}       }
 
 
         event: content_block_delta
 
-        data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":",
-        \"b\": 1}"}              }
+        data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"1}"}           }
 
 
         event: content_block_stop
 
-        data: {"type":"content_block_stop","index":1      }
+        data: {"type":"content_block_stop","index":1           }
 
 
         event: message_delta
 
-        data: {"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"output_tokens":104}         }
+        data: {"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"output_tokens":104}
+        }
 
 
         event: message_stop
 
-        data: {"type":"message_stop" }
+        data: {"type":"message_stop"           }
 
 
         '
@@ -138,7 +145,7 @@ interactions:
       CF-Cache-Status:
       - DYNAMIC
       CF-RAY:
-      - 8e3691acab48fa8e-SJC
+      - 8ea6a3809d1e6804-SJC
       Cache-Control:
       - no-cache
       Connection:
@@ -146,7 +153,7 @@ interactions:
       Content-Type:
       - text/event-stream; charset=utf-8
       Date:
-      - Sat, 16 Nov 2024 09:49:08 GMT
+      - Sat, 30 Nov 2024 00:14:41 GMT
       Server:
       - cloudflare
       Transfer-Encoding:
@@ -154,7 +161,7 @@ interactions:
       X-Robots-Tag:
       - none
       request-id:
-      - req_01NTz6P9SwW8GXBoocicUsGe
+      - req_01JCEB6rs4JGaU8SERKFTiLv
       via:
       - 1.1 google
     status:
diff --git a/tests/chat_model/cassettes/test_anthropic_chat_model/test_anthropic_chat_model_complete_streamed_response.yaml b/tests/chat_model/cassettes/test_anthropic_chat_model/test_anthropic_chat_model_complete_streamed_response.yaml
new file mode 100644
index 0000000..a24604b
--- /dev/null
+++ b/tests/chat_model/cassettes/test_anthropic_chat_model/test_anthropic_chat_model_complete_streamed_response.yaml
@@ -0,0 +1,224 @@
+interactions:
+- request:
+    body: '{"max_tokens": 1024, "messages": [{"role": "user", "content": [{"type":
+      "text", "text": "Pick a random city. Then get its weather."}]}], "model": "claude-3-opus-20240229",
+      "tools": [{"name": "get_weather", "description": "Get the weather for a location.",
+      "input_schema": {"properties": {"location": {"title": "Location", "type": "string"}},
+      "required": ["location"], "type": "object"}}], "stream": true}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      anthropic-version:
+      - '2023-06-01'
+      connection:
+      - keep-alive
+      content-length:
+      - '404'
+      content-type:
+      - application/json
+      host:
+      - api.anthropic.com
+      user-agent:
+      - Anthropic/Python 0.39.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 0.39.0
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.10.15
+      x-stainless-stream-helper:
+      - messages
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: 'event: message_start
+
+        data: {"type":"message_start","message":{"id":"msg_01EAz3WTgmdu4sGrsMWtZZbS","type":"message","role":"assistant","model":"claude-3-opus-20240229","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":610,"output_tokens":4}}      }
+
+
+        event: content_block_start
+
+        data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}         }
+
+
+        event: ping
+
+        data: {"type": "ping"}
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"<thinking>\nTo
+        get the"}     }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        weather for a random city, I will"}     }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        need to:\n1"}      }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":".
+        Pick a random city name"}   }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"\n2.
+        Pass that city name to"}      }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        the get_weather"}   }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        tool to retrieve the weather"}      }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"\n\nI
+        have enough information to complete"}           }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        both of those steps without"}               }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        asking the user for any"}   }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        additional input. I"}         }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        will proceed with the tool"}             }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+        calls in that order."}   }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"\n</thinking>"}  }
+
+
+        event: content_block_stop
+
+        data: {"type":"content_block_stop","index":0           }
+
+
+        event: content_block_start
+
+        data: {"type":"content_block_start","index":1,"content_block":{"type":"tool_use","id":"toolu_016tMRxcvkC82nkYxLpk2cHm","name":"get_weather","input":{}}              }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":""}       }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"{\"location\":"}        }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"
+        \"M"}            }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"adrid,"}      }
+
+
+        event: content_block_delta
+
+        data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"
+        Spain\"}"}         }
+
+
+        event: content_block_stop
+
+        data: {"type":"content_block_stop","index":1             }
+
+
+        event: message_delta
+
+        data: {"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"output_tokens":131}             }
+
+
+        event: message_stop
+
+        data: {"type":"message_stop"           }
+
+
+        '
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8ea9a8ad7f2067f0-SJC
+      Cache-Control:
+      - no-cache
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Sat, 30 Nov 2024 09:02:30 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Robots-Tag:
+      - none
+      request-id:
+      - req_01LbrA8LviDy8pZqZvNb7RXN
+      via:
+      - 1.1 google
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/chat_model/cassettes/test_openai_chat_model/test_openai_chat_model_acomplete_async_streamed_response.yaml b/tests/chat_model/cassettes/test_openai_chat_model/test_openai_chat_model_acomplete_async_streamed_response.yaml
new file mode 100644
index 0000000..12c90a2
--- /dev/null
+++ b/tests/chat_model/cassettes/test_openai_chat_model/test_openai_chat_model_acomplete_async_streamed_response.yaml
@@ -0,0 +1,184 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Tell me your favorite city.
+      Then get its weather."}], "model": "gpt-4o", "stream": true, "stream_options":
+      {"include_usage": true}, "tools": [{"type": "function", "function": {"name":
+      "get_weather", "parameters": {"properties": {"location": {"title": "Location",
+      "type": "string"}}, "required": ["location"], "type": "object"}, "description":
+      "Get the weather for a location."}}]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '423'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - AsyncOpenAI/Python 1.54.4
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.54.4
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.10.15
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-AZDVd6a4QoTFK4nnhdktVq8qGISsh","object":"chat.completion.chunk","created":1732956773,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_831e067d82","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZDVd6a4QoTFK4nnhdktVq8qGISsh","object":"chat.completion.chunk","created":1732956773,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_831e067d82","choices":[{"index":0,"delta":{"content":"One"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZDVd6a4QoTFK4nnhdktVq8qGISsh","object":"chat.completion.chunk","created":1732956773,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_831e067d82","choices":[{"index":0,"delta":{"content":"
+        of"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZDVd6a4QoTFK4nnhdktVq8qGISsh","object":"chat.completion.chunk","created":1732956773,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_831e067d82","choices":[{"index":0,"delta":{"content":"
+        my"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZDVd6a4QoTFK4nnhdktVq8qGISsh","object":"chat.completion.chunk","created":1732956773,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_831e067d82","choices":[{"index":0,"delta":{"content":"
+        favorite"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZDVd6a4QoTFK4nnhdktVq8qGISsh","object":"chat.completion.chunk","created":1732956773,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_831e067d82","choices":[{"index":0,"delta":{"content":"
+        cities"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZDVd6a4QoTFK4nnhdktVq8qGISsh","object":"chat.completion.chunk","created":1732956773,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_831e067d82","choices":[{"index":0,"delta":{"content":"
+        is"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZDVd6a4QoTFK4nnhdktVq8qGISsh","object":"chat.completion.chunk","created":1732956773,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_831e067d82","choices":[{"index":0,"delta":{"content":"
+        Tokyo"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZDVd6a4QoTFK4nnhdktVq8qGISsh","object":"chat.completion.chunk","created":1732956773,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_831e067d82","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZDVd6a4QoTFK4nnhdktVq8qGISsh","object":"chat.completion.chunk","created":1732956773,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_831e067d82","choices":[{"index":0,"delta":{"content":"
+        Let"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZDVd6a4QoTFK4nnhdktVq8qGISsh","object":"chat.completion.chunk","created":1732956773,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_831e067d82","choices":[{"index":0,"delta":{"content":"
+        me"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZDVd6a4QoTFK4nnhdktVq8qGISsh","object":"chat.completion.chunk","created":1732956773,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_831e067d82","choices":[{"index":0,"delta":{"content":"
+        get"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZDVd6a4QoTFK4nnhdktVq8qGISsh","object":"chat.completion.chunk","created":1732956773,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_831e067d82","choices":[{"index":0,"delta":{"content":"
+        the"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZDVd6a4QoTFK4nnhdktVq8qGISsh","object":"chat.completion.chunk","created":1732956773,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_831e067d82","choices":[{"index":0,"delta":{"content":"
+        current"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZDVd6a4QoTFK4nnhdktVq8qGISsh","object":"chat.completion.chunk","created":1732956773,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_831e067d82","choices":[{"index":0,"delta":{"content":"
+        weather"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZDVd6a4QoTFK4nnhdktVq8qGISsh","object":"chat.completion.chunk","created":1732956773,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_831e067d82","choices":[{"index":0,"delta":{"content":"
+        for"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZDVd6a4QoTFK4nnhdktVq8qGISsh","object":"chat.completion.chunk","created":1732956773,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_831e067d82","choices":[{"index":0,"delta":{"content":"
+        Tokyo"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZDVd6a4QoTFK4nnhdktVq8qGISsh","object":"chat.completion.chunk","created":1732956773,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_831e067d82","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZDVd6a4QoTFK4nnhdktVq8qGISsh","object":"chat.completion.chunk","created":1732956773,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_831e067d82","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"id":"call_IWcsO4eLBisJfla3ok3hAw8Y","type":"function","function":{"name":"get_weather","arguments":""}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZDVd6a4QoTFK4nnhdktVq8qGISsh","object":"chat.completion.chunk","created":1732956773,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_831e067d82","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZDVd6a4QoTFK4nnhdktVq8qGISsh","object":"chat.completion.chunk","created":1732956773,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_831e067d82","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZDVd6a4QoTFK4nnhdktVq8qGISsh","object":"chat.completion.chunk","created":1732956773,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_831e067d82","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZDVd6a4QoTFK4nnhdktVq8qGISsh","object":"chat.completion.chunk","created":1732956773,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_831e067d82","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"Tokyo"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZDVd6a4QoTFK4nnhdktVq8qGISsh","object":"chat.completion.chunk","created":1732956773,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_831e067d82","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZDVd6a4QoTFK4nnhdktVq8qGISsh","object":"chat.completion.chunk","created":1732956773,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_831e067d82","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZDVd6a4QoTFK4nnhdktVq8qGISsh","object":"chat.completion.chunk","created":1732956773,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_831e067d82","choices":[],"usage":{"prompt_tokens":58,"completion_tokens":32,"total_tokens":90,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8ea99a992fa31587-SJC
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Sat, 30 Nov 2024 08:52:54 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-processing-ms:
+      - '279'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '500'
+      x-ratelimit-limit-tokens:
+      - '30000'
+      x-ratelimit-remaining-requests:
+      - '499'
+      x-ratelimit-remaining-tokens:
+      - '29970'
+      x-ratelimit-reset-requests:
+      - 120ms
+      x-ratelimit-reset-tokens:
+      - 60ms
+      x-request-id:
+      - req_0b778a6923f3dfaddbfafb0381d250a5
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/chat_model/cassettes/test_openai_chat_model/test_openai_chat_model_complete_streamed_response.yaml b/tests/chat_model/cassettes/test_openai_chat_model/test_openai_chat_model_complete_streamed_response.yaml
new file mode 100644
index 0000000..e375501
--- /dev/null
+++ b/tests/chat_model/cassettes/test_openai_chat_model/test_openai_chat_model_complete_streamed_response.yaml
@@ -0,0 +1,184 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Tell me your favorite city.
+      Then get its weather."}], "model": "gpt-4o", "stream": true, "stream_options":
+      {"include_usage": true}, "tools": [{"type": "function", "function": {"name":
+      "get_weather", "parameters": {"properties": {"location": {"title": "Location",
+      "type": "string"}}, "required": ["location"], "type": "object"}, "description":
+      "Get the weather for a location."}}]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '423'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.54.4
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.54.4
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.10.15
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-AZCKpkxaAFkeRLY66s44FuICTMbAh","object":"chat.completion.chunk","created":1732952259,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7f6be3efb0","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZCKpkxaAFkeRLY66s44FuICTMbAh","object":"chat.completion.chunk","created":1732952259,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7f6be3efb0","choices":[{"index":0,"delta":{"content":"One"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZCKpkxaAFkeRLY66s44FuICTMbAh","object":"chat.completion.chunk","created":1732952259,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7f6be3efb0","choices":[{"index":0,"delta":{"content":"
+        of"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZCKpkxaAFkeRLY66s44FuICTMbAh","object":"chat.completion.chunk","created":1732952259,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7f6be3efb0","choices":[{"index":0,"delta":{"content":"
+        my"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZCKpkxaAFkeRLY66s44FuICTMbAh","object":"chat.completion.chunk","created":1732952259,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7f6be3efb0","choices":[{"index":0,"delta":{"content":"
+        favorite"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZCKpkxaAFkeRLY66s44FuICTMbAh","object":"chat.completion.chunk","created":1732952259,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7f6be3efb0","choices":[{"index":0,"delta":{"content":"
+        cities"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZCKpkxaAFkeRLY66s44FuICTMbAh","object":"chat.completion.chunk","created":1732952259,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7f6be3efb0","choices":[{"index":0,"delta":{"content":"
+        is"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZCKpkxaAFkeRLY66s44FuICTMbAh","object":"chat.completion.chunk","created":1732952259,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7f6be3efb0","choices":[{"index":0,"delta":{"content":"
+        Paris"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZCKpkxaAFkeRLY66s44FuICTMbAh","object":"chat.completion.chunk","created":1732952259,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7f6be3efb0","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZCKpkxaAFkeRLY66s44FuICTMbAh","object":"chat.completion.chunk","created":1732952259,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7f6be3efb0","choices":[{"index":0,"delta":{"content":"
+        I''ll"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZCKpkxaAFkeRLY66s44FuICTMbAh","object":"chat.completion.chunk","created":1732952259,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7f6be3efb0","choices":[{"index":0,"delta":{"content":"
+        get"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZCKpkxaAFkeRLY66s44FuICTMbAh","object":"chat.completion.chunk","created":1732952259,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7f6be3efb0","choices":[{"index":0,"delta":{"content":"
+        the"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZCKpkxaAFkeRLY66s44FuICTMbAh","object":"chat.completion.chunk","created":1732952259,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7f6be3efb0","choices":[{"index":0,"delta":{"content":"
+        current"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZCKpkxaAFkeRLY66s44FuICTMbAh","object":"chat.completion.chunk","created":1732952259,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7f6be3efb0","choices":[{"index":0,"delta":{"content":"
+        weather"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZCKpkxaAFkeRLY66s44FuICTMbAh","object":"chat.completion.chunk","created":1732952259,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7f6be3efb0","choices":[{"index":0,"delta":{"content":"
+        for"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZCKpkxaAFkeRLY66s44FuICTMbAh","object":"chat.completion.chunk","created":1732952259,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7f6be3efb0","choices":[{"index":0,"delta":{"content":"
+        Paris"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZCKpkxaAFkeRLY66s44FuICTMbAh","object":"chat.completion.chunk","created":1732952259,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7f6be3efb0","choices":[{"index":0,"delta":{"content":"
+        now"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZCKpkxaAFkeRLY66s44FuICTMbAh","object":"chat.completion.chunk","created":1732952259,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7f6be3efb0","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZCKpkxaAFkeRLY66s44FuICTMbAh","object":"chat.completion.chunk","created":1732952259,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7f6be3efb0","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"id":"call_QgHUYg30lU8RAkIADo1bLA3P","type":"function","function":{"name":"get_weather","arguments":""}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZCKpkxaAFkeRLY66s44FuICTMbAh","object":"chat.completion.chunk","created":1732952259,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7f6be3efb0","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZCKpkxaAFkeRLY66s44FuICTMbAh","object":"chat.completion.chunk","created":1732952259,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7f6be3efb0","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZCKpkxaAFkeRLY66s44FuICTMbAh","object":"chat.completion.chunk","created":1732952259,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7f6be3efb0","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZCKpkxaAFkeRLY66s44FuICTMbAh","object":"chat.completion.chunk","created":1732952259,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7f6be3efb0","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"Paris"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZCKpkxaAFkeRLY66s44FuICTMbAh","object":"chat.completion.chunk","created":1732952259,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7f6be3efb0","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZCKpkxaAFkeRLY66s44FuICTMbAh","object":"chat.completion.chunk","created":1732952259,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7f6be3efb0","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}],"usage":null}
+
+
+        data: {"id":"chatcmpl-AZCKpkxaAFkeRLY66s44FuICTMbAh","object":"chat.completion.chunk","created":1732952259,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_7f6be3efb0","choices":[],"usage":{"prompt_tokens":58,"completion_tokens":32,"total_tokens":90,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8ea92c615fda67ab-SJC
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Sat, 30 Nov 2024 07:37:39 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-processing-ms:
+      - '702'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '500'
+      x-ratelimit-limit-tokens:
+      - '30000'
+      x-ratelimit-remaining-requests:
+      - '499'
+      x-ratelimit-remaining-tokens:
+      - '29970'
+      x-ratelimit-reset-requests:
+      - 120ms
+      x-ratelimit-reset-tokens:
+      - 60ms
+      x-request-id:
+      - req_097708ba59a9ba323b752a50e2e1b863
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/chat_model/test_anthropic_chat_model.py b/tests/chat_model/test_anthropic_chat_model.py
index d296bd9..086f4da 100644
--- a/tests/chat_model/test_anthropic_chat_model.py
+++ b/tests/chat_model/test_anthropic_chat_model.py
@@ -3,6 +3,7 @@
 import pytest
 from pydantic import AfterValidator, BaseModel
 
+from magentic._streamed_response import AsyncStreamedResponse, StreamedResponse
 from magentic.chat_model.anthropic_chat_model import AnthropicChatModel
 from magentic.chat_model.base import ToolSchemaParseError
 from magentic.chat_model.message import Message, Usage, UserMessage
@@ -123,6 +124,25 @@ def minus(a: int, b: int) -> int:
     assert len(list(message.content)) == 2
 
 
+@pytest.mark.anthropic
+def test_anthropic_chat_model_complete_streamed_response():
+    def get_weather(location: str) -> None:
+        """Get the weather for a location."""
+
+    chat_model = AnthropicChatModel("claude-3-opus-20240229")
+    message = chat_model.complete(
+        messages=[UserMessage("Pick a random city. Then get its weather.")],
+        functions=[get_weather],
+        output_types=[StreamedResponse],
+    )
+    assert isinstance(message.content, StreamedResponse)
+    response_items = list(message.content)
+    assert len(response_items) == 2
+    streamed_str, function_call = response_items
+    assert isinstance(streamed_str, StreamedStr)
+    assert isinstance(function_call, FunctionCall)
+
+
 @pytest.mark.parametrize(
     ("prompt", "output_types", "expected_output_type"),
     [
@@ -219,3 +239,22 @@ def minus(a: int, b: int) -> int:
     )
     assert isinstance(message.content, AsyncParallelFunctionCall)
     assert len([x async for x in message.content]) == 2
+
+
+@pytest.mark.anthropic
+async def test_anthropic_chat_model_acomplete_async_streamed_response():
+    def get_weather(location: str) -> None:
+        """Get the weather for a location."""
+
+    chat_model = AnthropicChatModel("claude-3-opus-20240229")
+    message = await chat_model.acomplete(
+        messages=[UserMessage("Pick a random city. Then get its weather.")],
+        functions=[get_weather],
+        output_types=[AsyncStreamedResponse],
+    )
+    assert isinstance(message.content, AsyncStreamedResponse)
+    response_items = [x async for x in message.content]
+    assert len(response_items) == 2
+    streamed_str, function_call = response_items
+    assert isinstance(streamed_str, AsyncStreamedStr)
+    assert isinstance(function_call, FunctionCall)
diff --git a/tests/chat_model/test_openai_chat_model.py b/tests/chat_model/test_openai_chat_model.py
index 26f06ef..98d37aa 100644
--- a/tests/chat_model/test_openai_chat_model.py
+++ b/tests/chat_model/test_openai_chat_model.py
@@ -8,6 +8,7 @@
 from pydantic import AfterValidator, BaseModel
 
 from magentic._pydantic import ConfigDict, with_config
+from magentic._streamed_response import AsyncStreamedResponse, StreamedResponse
 from magentic.chat_model.base import ToolSchemaParseError
 from magentic.chat_model.message import (
     AssistantMessage,
@@ -91,6 +92,22 @@ def plus(a: int, b: int) -> int:
                 ],
             },
         ),
+        (
+            AssistantMessage(
+                StreamedResponse([StreamedStr(["Hello"]), FunctionCall(plus, 1, 2)])
+            ),
+            {
+                "role": "assistant",
+                "content": "Hello",
+                "tool_calls": [
+                    {
+                        "id": ANY,
+                        "type": "function",
+                        "function": {"name": "plus", "arguments": '{"a":1,"b":2}'},
+                    },
+                ],
+            },
+        ),
         (
             FunctionResultMessage(3, FunctionCall(plus, 1, 2)),
             {
@@ -155,6 +172,25 @@ def test_openai_chat_model_complete_seed():
     assert message1.content == message2.content
 
 
+@pytest.mark.openai
+def test_openai_chat_model_complete_streamed_response():
+    def get_weather(location: str) -> None:
+        """Get the weather for a location."""
+
+    chat_model = OpenaiChatModel("gpt-4o")
+    message = chat_model.complete(
+        messages=[UserMessage("Tell me your favorite city. Then get its weather.")],
+        functions=[get_weather],
+        output_types=[StreamedResponse],
+    )
+    assert isinstance(message.content, StreamedResponse)
+    response_items = list(message.content)
+    assert len(response_items) == 2
+    streamed_str, function_call = response_items
+    assert isinstance(streamed_str, StreamedStr)
+    assert isinstance(function_call, FunctionCall)
+
+
 @pytest.mark.openai
 def test_openai_chat_model_complete_pydantic_model_openai_strict():
     class CapitalCity(BaseModel):
@@ -238,6 +274,25 @@ class Test(BaseModel):
         )
 
 
+@pytest.mark.openai
+async def test_openai_chat_model_acomplete_async_streamed_response():
+    def get_weather(location: str) -> None:
+        """Get the weather for a location."""
+
+    chat_model = OpenaiChatModel("gpt-4o")
+    message = await chat_model.acomplete(
+        messages=[UserMessage("Tell me your favorite city. Then get its weather.")],
+        functions=[get_weather],
+        output_types=[AsyncStreamedResponse],
+    )
+    assert isinstance(message.content, AsyncStreamedResponse)
+    response_items = [x async for x in message.content]
+    assert len(response_items) == 2
+    streamed_str, function_call = response_items
+    assert isinstance(streamed_str, AsyncStreamedStr)
+    assert isinstance(function_call, FunctionCall)
+
+
 @pytest.mark.openai
 async def test_openai_chat_model_acomplete_usage():
     chat_model = OpenaiChatModel("gpt-4o")
diff --git a/tests/conftest.py b/tests/conftest.py
index 3166c8a..699072f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -14,9 +14,10 @@
 @pytest.fixture(autouse=True, scope="session")
 def _load_dotenv():
     """Load .env file so API keys are available when rewriting VCR cassettes"""
+    # Prefer env vars from .env over existing env vars
+    load_dotenv(override=True)
     # Set default values so tests can run using existing cassettes without env vars
     load_dotenv(".env.template")
-    load_dotenv()
 
 
 def pytest_recording_configure(config: pytest.Config, vcr: VCR) -> None:
diff --git a/tests/test_streaming.py b/tests/test_streaming.py
index 5741450..bb1f148 100644
--- a/tests/test_streaming.py
+++ b/tests/test_streaming.py
@@ -114,6 +114,13 @@ async def test_agroupby(aiterable, key, expected):
     ] == expected
 
 
+async def test_agroupby_skip():
+    aiterable = agroupby(async_iter([1, 1, 2, 2, 3, 3]), lambda x: x)
+    key1, group1 = await anext(aiterable)
+    key2, group2 = await anext(aiterable)
+    assert [x async for x in group2] == [2, 2]
+
+
 iter_streamed_json_array_test_cases = [
     (["[]"], []),
     (['["He', 'llo", ', '"Wo', 'rld"]'], ['"Hello"', '"World"']),
diff --git a/uv.lock b/uv.lock
index 4db97cd..6a847c5 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1415,6 +1415,7 @@ dev = [
     { name = "litellm" },
     { name = "logfire" },
     { name = "mypy" },
+    { name = "pydeps" },
     { name = "pytest" },
     { name = "pytest-asyncio" },
     { name = "pytest-clarity" },
@@ -1457,6 +1458,7 @@ dev = [
     { name = "litellm" },
     { name = "logfire", specifier = ">=0.46.1,<0.50.0" },
     { name = "mypy" },
+    { name = "pydeps", specifier = ">=2.0.1" },
     { name = "pytest", specifier = ">=7.0.0" },
     { name = "pytest-asyncio", specifier = ">=0.18.0" },
     { name = "pytest-clarity", specifier = ">=0.1.0" },
@@ -2476,6 +2478,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5e/f9/ff95fd7d760af42f647ea87f9b8a383d891cdb5e5dbd4613edaeb094252a/pydantic_settings-2.6.1-py3-none-any.whl", hash = "sha256:7fb0637c786a558d3103436278a7c4f1cfd29ba8973238a50c5bb9a55387da87", size = 28595 },
 ]
 
+[[package]]
+name = "pydeps"
+version = "2.0.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "stdlib-list" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/65/b5/8163e1594c45cee4e2890c74b7a7c78f4b3973ffecd418cd5e37119b342d/pydeps-2.0.1.tar.gz", hash = "sha256:6f898ab683ef7c47b14c9cc86bf28b15adebc466cb343c784da4090b2573c704", size = 51327 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/71/67/2274bf061ca84621d9e3e006193a76a6ca94c760d16ff7c6414ca2323a79/pydeps-2.0.1-py3-none-any.whl", hash = "sha256:9e3589fff8ac2fc17d672c8132c8b7fab921d30fbb6b37b61bea2c9fc4f84901", size = 46450 },
+]
+
 [[package]]
 name = "pygments"
 version = "2.18.0"
@@ -3064,6 +3078,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f1/7b/ce1eafaf1a76852e2ec9b22edecf1daa58175c090266e9f6c64afcd81d91/stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695", size = 24521 },
 ]
 
+[[package]]
+name = "stdlib-list"
+version = "0.11.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/5d/04/6b37a71e92ddca16b190b7df62494ac4779d58ced4787f73584eb32c8f03/stdlib_list-0.11.0.tar.gz", hash = "sha256:b74a7b643a77a12637e907f3f62f0ab9f67300bce4014f6b2d3c8b4c8fd63c66", size = 60335 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/16/fe/e07300c027a868d32d8ed7a425503401e91a03ff90e7ca525c115c634ffb/stdlib_list-0.11.0-py3-none-any.whl", hash = "sha256:8bf8decfffaaf273d4cfeb5bd852b910a00dec1037dcf163576803622bccf597", size = 83617 },
+]
+
 [[package]]
 name = "terminado"
 version = "0.18.1"

From 0b4b79579e4906d8a803bffee35eb61a3b3e87c5 Mon Sep 17 00:00:00 2001
From: Jack Collins <6640905+jackmpcollins@users.noreply.github.com>
Date: Sat, 30 Nov 2024 01:29:17 -0800
Subject: [PATCH 3/7] Bump version to 0.34.0

---
 pyproject.toml | 2 +-
 uv.lock        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 33c5f61..84d6d8d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "magentic"
-version = "0.33.0"
+version = "0.34.0"
 description = "Seamlessly integrate LLMs as Python functions"
 readme = "README.md"
 requires-python = ">=3.10"
diff --git a/uv.lock b/uv.lock
index 6a847c5..34e3791 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1389,7 +1389,7 @@ wheels = [
 
 [[package]]
 name = "magentic"
-version = "0.33.0"
+version = "0.34.0"
 source = { editable = "." }
 dependencies = [
     { name = "filetype" },

From f7e7e37117eb1e0ba2943ca129cbc4102b4a3d74 Mon Sep 17 00:00:00 2001
From: Jack Collins <6640905+jackmpcollins@users.noreply.github.com>
Date: Sat, 30 Nov 2024 16:10:27 -0800
Subject: [PATCH 4/7] Consume LLM output stream via returned objects to allow
 caching (#384)

* Consume stream via objects to allow caching

* Update tests to check caching

* Change StreamedStr consume warning to tip in docs
---
 docs/streaming.md                             |  4 +-
 docs/structured-outputs.md                    |  2 +-
 src/magentic/chat_model/stream.py             | 58 +++++++++++--------
 src/magentic/streaming.py                     |  2 +-
 tests/chat_model/test_anthropic_chat_model.py |  4 ++
 tests/chat_model/test_openai_chat_model.py    |  4 ++
 6 files changed, 45 insertions(+), 29 deletions(-)

diff --git a/docs/streaming.md b/docs/streaming.md
index 348f350..cfc2bc2 100644
--- a/docs/streaming.md
+++ b/docs/streaming.md
@@ -84,9 +84,9 @@ for hero in create_superhero_team("The Food Dudes"):
 
 Some LLMs have the ability to generate text output and make tool calls in the same response. This allows them to perform chain-of-thought reasoning or provide additional context to the user. In magentic, the `StreamedResponse` (or `AsyncStreamedResponse`) class can be used to request this type of output. This object is an iterable of `StreamedStr` (or `AsyncStreamedStr`) and `FunctionCall` instances.
 
-!!! warning "Consuming StreamedStr"
+!!! tip "Consuming StreamedStr"
 
-    The StreamedStr object must be iterated over before the next item in the `StreamedResponse` is processed, otherwise the string output will be lost. This is because the `StreamedResponse` and `StreamedStr` share the same underlying generator, so advancing the `StreamedResponse` iterator skips over the `StreamedStr` items. The `StreamedStr` object has internal caching so after iterating over it once the chunks will remain available.
+    The StreamedStr object caches its chunks internally, so it does not have to be consumed immediately. This means you can iterate over the chunks as they are received, and/or use the StreamedStr object as a whole after the LLM has finished generating the output.
 
 In the example below, we request that the LLM generates a greeting and then calls a function to get the weather for two cities. The `StreamedResponse` object is then iterated over to print the output, and the `StreamedStr` and `FunctionCall` items are processed separately.
 
diff --git a/docs/structured-outputs.md b/docs/structured-outputs.md
index 1b1e1f6..059aaba 100644
--- a/docs/structured-outputs.md
+++ b/docs/structured-outputs.md
@@ -150,7 +150,7 @@ print(hero_defeated)
 
 !!! warning "StreamedResponse"
 
-    It is now recommended to use `StreamedResponse` for chain-of-thought prompting, as this uses the LLM provider's native chain-of-thought capabilities. See [StreamedResponse](streaming.md#StreamedResponse) for more information.
+    It is now recommended to use `StreamedResponse` for chain-of-thought prompting, as this uses the LLM provider's native chain-of-thought capabilities. See [StreamedResponse](streaming.md#streamedresponse) for more information.
 
 Using a simple Python type as the return annotation might result in poor results as the LLM has no time to arrange its thoughts before answering. To allow the LLM to work through this "chain of thought" you can instead return a pydantic model with initial fields for explaining the final response.
 
diff --git a/src/magentic/chat_model/stream.py b/src/magentic/chat_model/stream.py
index da08f28..26078aa 100644
--- a/src/magentic/chat_model/stream.py
+++ b/src/magentic/chat_model/stream.py
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from collections.abc import AsyncIterator, Iterable, Iterator
+from collections.abc import AsyncIterable, AsyncIterator, Iterable, Iterator
 from itertools import chain
 from typing import Any, Generic, NamedTuple, TypeVar
 
@@ -81,6 +81,7 @@ def __init__(
         self._state = state
 
         self._iterator = self.__stream__()
+        self._exhausted: bool = False
 
     def __next__(self) -> StreamedStr | OutputT:
         return self._iterator.__next__()
@@ -99,6 +100,7 @@ def _streamed_str(
                 assert not current_item_ref  # noqa: S101
                 current_item_ref.append(item)
                 return
+        self._exhausted = True
 
     def _tool_call(
         self,
@@ -116,6 +118,7 @@ def _tool_call(
                 return
             if item.args:
                 yield item.args
+        self._exhausted = True
 
     def __stream__(self) -> Iterator[StreamedStr | OutputT]:
         # This works similarly to `itertools.groupby`
@@ -125,10 +128,12 @@ def __stream__(self) -> Iterator[StreamedStr | OutputT]:
             current_item = current_item_ref.pop()
             if self._parser.is_content(current_item):
                 stream = chain([current_item], stream)
-                yield StreamedStr(self._streamed_str(stream, current_item_ref))
-                if not current_item_ref:
+                streamed_str = StreamedStr(self._streamed_str(stream, current_item_ref))
+                yield streamed_str
+                if not current_item_ref and not self._exhausted:
                     # Finish the group to allow advancing to the next one
-                    consume(self._streamed_str(stream, current_item_ref))
+                    # Consume stream via StreamedStr so it can cache
+                    consume(streamed_str)
             elif self._parser.is_tool_call(current_item):
                 tool_calls_stream: Iterator[FunctionCallChunk] = (
                     tool_call_chunk
@@ -155,20 +160,18 @@ def __stream__(self) -> Iterator[StreamedStr | OutputT]:
                         tool_calls_stream = chain(
                             [current_tool_call_chunk], tool_calls_stream
                         )
-                        yield function_schema.parse_args(
+                        output = function_schema.parse_args(
                             self._tool_call(
                                 tool_calls_stream, tool_call_ref, current_tool_call_id
                             )
                         )
-                        if not tool_call_ref:
+                        yield output
+                        if not tool_call_ref and not self._exhausted:
                             # Finish the group to allow advancing to the next one
-                            consume(
-                                self._tool_call(
-                                    tool_calls_stream,
-                                    tool_call_ref,
-                                    current_tool_call_id,
-                                )
-                            )
+                            # Output must be Iterable if parse_args above did not consume
+                            assert isinstance(output, Iterable), output  # noqa: S101
+                            # Consume stream via the output type so it can cache
+                            consume(output)
 
                     except ValidationError as e:
                         assert current_tool_call_id is not None  # noqa: S101
@@ -201,6 +204,7 @@ def __init__(
         self._state = state
 
         self._iterator = self.__stream__()
+        self._exhausted: bool = False
 
     async def __anext__(self) -> AsyncStreamedStr | OutputT:
         return await self._iterator.__anext__()
@@ -220,6 +224,7 @@ async def _streamed_str(
                 assert not current_item_ref  # noqa: S101
                 current_item_ref.append(item)
                 return
+        self._exhausted = True
 
     async def _tool_call(
         self,
@@ -235,6 +240,7 @@ async def _tool_call(
                 return
             if item.args:
                 yield item.args
+        self._exhausted = True
 
     async def __stream__(self) -> AsyncIterator[AsyncStreamedStr | OutputT]:
         stream = aapply(self._state.update, self._stream)
@@ -243,10 +249,14 @@ async def __stream__(self) -> AsyncIterator[AsyncStreamedStr | OutputT]:
             current_item = current_item_ref.pop()
             if self._parser.is_content(current_item):
                 stream = achain(async_iter([current_item]), stream)
-                yield AsyncStreamedStr(self._streamed_str(stream, current_item_ref))
-                if not current_item_ref:
+                streamed_str = AsyncStreamedStr(
+                    self._streamed_str(stream, current_item_ref)
+                )
+                yield streamed_str
+                if not current_item_ref and not self._exhausted:
                     # Finish the group to allow advancing to the next one
-                    await aconsume(self._streamed_str(stream, current_item_ref))
+                    # Consume stream via AsyncStreamedStr so it can cache
+                    await aconsume(streamed_str)
             elif self._parser.is_tool_call(current_item):
                 tool_calls_stream: AsyncIterator[FunctionCallChunk] = (
                     tool_call_chunk
@@ -273,20 +283,18 @@ async def __stream__(self) -> AsyncIterator[AsyncStreamedStr | OutputT]:
                         tool_calls_stream = achain(
                             async_iter([current_tool_call_chunk]), tool_calls_stream
                         )
-                        yield await function_schema.aparse_args(
+                        output = await function_schema.aparse_args(
                             self._tool_call(
                                 tool_calls_stream, tool_call_ref, current_tool_call_id
                             )
                         )
-                        if not tool_call_ref:
+                        yield output
+                        if not tool_call_ref and not self._exhausted:
                             # Finish the group to allow advancing to the next one
-                            await aconsume(
-                                self._tool_call(
-                                    tool_calls_stream,
-                                    tool_call_ref,
-                                    current_tool_call_id,
-                                )
-                            )
+                            # Output must be AsyncIterable if aparse_args above did not consume
+                            assert isinstance(output, AsyncIterable), output  # noqa: S101
+                            # Consume stream via the output type so it can cache
+                            await aconsume(output)
                     except ValidationError as e:
                         assert current_tool_call_id is not None  # noqa: S101
                         raise ToolSchemaParseError(
diff --git a/src/magentic/streaming.py b/src/magentic/streaming.py
index fdc6881..f0911d6 100644
--- a/src/magentic/streaming.py
+++ b/src/magentic/streaming.py
@@ -85,7 +85,7 @@ async def atakewhile(
         yield item
 
 
-def consume(iterator: Iterator[T]) -> None:
+def consume(iterator: Iterable[T]) -> None:
     """Consume an iterator."""
     collections.deque(iterator, maxlen=0)
 
diff --git a/tests/chat_model/test_anthropic_chat_model.py b/tests/chat_model/test_anthropic_chat_model.py
index 086f4da..8b88a6c 100644
--- a/tests/chat_model/test_anthropic_chat_model.py
+++ b/tests/chat_model/test_anthropic_chat_model.py
@@ -140,7 +140,9 @@ def get_weather(location: str) -> None:
     assert len(response_items) == 2
     streamed_str, function_call = response_items
     assert isinstance(streamed_str, StreamedStr)
+    assert len(streamed_str.to_string()) > 1  # Check StreamedStr was cached
     assert isinstance(function_call, FunctionCall)
+    assert function_call() is None  # Check FunctionCall is successfully called
 
 
 @pytest.mark.parametrize(
@@ -257,4 +259,6 @@ def get_weather(location: str) -> None:
     assert len(response_items) == 2
     streamed_str, function_call = response_items
     assert isinstance(streamed_str, AsyncStreamedStr)
+    assert len(await streamed_str.to_string()) > 1  # Check AsyncStreamedStr was cached
     assert isinstance(function_call, FunctionCall)
+    assert function_call() is None  # Check FunctionCall is successfully called
diff --git a/tests/chat_model/test_openai_chat_model.py b/tests/chat_model/test_openai_chat_model.py
index 98d37aa..be6e539 100644
--- a/tests/chat_model/test_openai_chat_model.py
+++ b/tests/chat_model/test_openai_chat_model.py
@@ -188,7 +188,9 @@ def get_weather(location: str) -> None:
     assert len(response_items) == 2
     streamed_str, function_call = response_items
     assert isinstance(streamed_str, StreamedStr)
+    assert len(streamed_str.to_string()) > 1  # Check StreamedStr was cached
     assert isinstance(function_call, FunctionCall)
+    assert function_call() is None  # Check FunctionCall is successfully called
 
 
 @pytest.mark.openai
@@ -290,7 +292,9 @@ def get_weather(location: str) -> None:
     assert len(response_items) == 2
     streamed_str, function_call = response_items
     assert isinstance(streamed_str, AsyncStreamedStr)
+    assert len(await streamed_str.to_string()) > 1  # Check AsyncStreamedStr was cached
     assert isinstance(function_call, FunctionCall)
+    assert function_call() is None  # Check FunctionCall is successfully called
 
 
 @pytest.mark.openai

From af31a8707581174f8be4d68583d26f34fc603bc4 Mon Sep 17 00:00:00 2001
From: Jack Collins <6640905+jackmpcollins@users.noreply.github.com>
Date: Sat, 30 Nov 2024 17:23:09 -0800
Subject: [PATCH 5/7] Improve ruff format/lint rules (#385)

* Add split-on-trailing-comma for imports

* make format

* Allow S101 assert, remove noqa comments

* Reenable temporarily disabled ruff rules
---
 pyproject.toml                                |  6 ++--
 .../chat_model/anthropic_chat_model.py        | 24 ++++------------
 src/magentic/chat_model/base.py               |  4 +--
 src/magentic/chat_model/litellm_chat_model.py | 18 ++++++------
 src/magentic/chat_model/openai_chat_model.py  | 28 +++++--------------
 src/magentic/chat_model/retry_chat_model.py   | 11 ++------
 src/magentic/chat_model/stream.py             | 28 +++++++++----------
 src/magentic/chatprompt.py                    | 10 +------
 src/magentic/function_call.py                 |  8 +-----
 src/magentic/prompt_chain.py                  |  7 +----
 src/magentic/prompt_function.py               | 10 +------
 src/magentic/typing.py                        | 11 ++------
 tests/test_backend.py                         |  4 +--
 13 files changed, 48 insertions(+), 121 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 84d6d8d..8165cce 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -108,6 +108,7 @@ ignore = [
     "TD",     # flake8-todos
     "FIX",    # flake8-fixme
     "PL",     # Pylint
+    "S101",   # assert
     # Compatibility with ruff formatter
     "E501",
     "ISC001",
@@ -116,10 +117,6 @@ ignore = [
     "Q002",
     "Q003",
     "W191",
-    "B905",   # TODO: Reenable this
-    "UP006",  # TODO: Reenable this
-    "UP007",  # TODO: Reenable this
-    "UP035",  # TODO: Reenable this
 ]
 
 [tool.ruff.lint.flake8-pytest-style]
@@ -127,6 +124,7 @@ mark-parentheses = false
 
 [tool.ruff.lint.isort]
 known-first-party = ["magentic"]
+split-on-trailing-comma = false
 
 [tool.ruff.lint.per-file-ignores]
 "docs/examples/*" = [
diff --git a/src/magentic/chat_model/anthropic_chat_model.py b/src/magentic/chat_model/anthropic_chat_model.py
index fe60ded..1f949a8 100644
--- a/src/magentic/chat_model/anthropic_chat_model.py
+++ b/src/magentic/chat_model/anthropic_chat_model.py
@@ -1,12 +1,6 @@
 import base64
 import json
-from collections.abc import (
-    AsyncIterator,
-    Callable,
-    Iterable,
-    Iterator,
-    Sequence,
-)
+from collections.abc import AsyncIterator, Callable, Iterable, Iterator, Sequence
 from enum import Enum
 from functools import singledispatch
 from itertools import groupby
@@ -15,11 +9,7 @@
 import filetype
 
 from magentic._parsing import contains_parallel_function_call_type, contains_string_type
-from magentic.chat_model.base import (
-    ChatModel,
-    aparse_stream,
-    parse_stream,
-)
+from magentic.chat_model.base import ChatModel, aparse_stream, parse_stream
 from magentic.chat_model.function_schema import (
     BaseFunctionSchema,
     FunctionCallFunctionSchema,
@@ -44,11 +34,7 @@
     StreamParser,
     StreamState,
 )
-from magentic.function_call import (
-    FunctionCall,
-    ParallelFunctionCall,
-    _create_unique_id,
-)
+from magentic.function_call import FunctionCall, ParallelFunctionCall, _create_unique_id
 from magentic.vision import UserImageMessage
 
 try:
@@ -273,7 +259,7 @@ def update(self, item: MessageStreamEvent) -> None:
             current_snapshot=self._current_message_snapshot,
         )
         if item.type == "message_stop":
-            assert not self.usage_ref  # noqa: S101
+            assert not self.usage_ref
             self.usage_ref.append(
                 Usage(
                     input_tokens=item.message.usage.input_tokens,
@@ -283,7 +269,7 @@ def update(self, item: MessageStreamEvent) -> None:
 
     @property
     def current_message_snapshot(self) -> Message[Any]:
-        assert self._current_message_snapshot is not None  # noqa: S101
+        assert self._current_message_snapshot is not None
         # TODO: Possible to return AssistantMessage here?
         return _RawMessage(self._current_message_snapshot.model_dump())
 
diff --git a/src/magentic/chat_model/base.py b/src/magentic/chat_model/base.py
index 4657033..1268fa9 100644
--- a/src/magentic/chat_model/base.py
+++ b/src/magentic/chat_model/base.py
@@ -1,9 +1,9 @@
 import types
 from abc import ABC, abstractmethod
-from collections.abc import Callable, Iterable
+from collections.abc import AsyncIterator, Callable, Iterable, Iterator
 from contextvars import ContextVar
 from itertools import chain
-from typing import Any, AsyncIterator, Iterator, TypeVar, cast, get_origin, overload
+from typing import Any, TypeVar, cast, get_origin, overload
 
 from pydantic import ValidationError
 
diff --git a/src/magentic/chat_model/litellm_chat_model.py b/src/magentic/chat_model/litellm_chat_model.py
index f38c9d7..ad90350 100644
--- a/src/magentic/chat_model/litellm_chat_model.py
+++ b/src/magentic/chat_model/litellm_chat_model.py
@@ -37,20 +37,20 @@
 
 class LitellmStreamParser(StreamParser[ModelResponse]):
     def is_content(self, item: ModelResponse) -> bool:
-        assert isinstance(item.choices[0], StreamingChoices)  # noqa: S101
+        assert isinstance(item.choices[0], StreamingChoices)
         return bool(item.choices[0].delta.content)
 
     def get_content(self, item: ModelResponse) -> str | None:
-        assert isinstance(item.choices[0], StreamingChoices)  # noqa: S101
-        assert isinstance(item.choices[0].delta.content, str | None)  # noqa: S101
+        assert isinstance(item.choices[0], StreamingChoices)
+        assert isinstance(item.choices[0].delta.content, str | None)
         return item.choices[0].delta.content
 
     def is_tool_call(self, item: ModelResponse) -> bool:
-        assert isinstance(item.choices[0], StreamingChoices)  # noqa: S101
+        assert isinstance(item.choices[0], StreamingChoices)
         return bool(item.choices[0].delta.tool_calls)
 
     def iter_tool_calls(self, item: ModelResponse) -> Iterable[FunctionCallChunk]:
-        assert isinstance(item.choices[0], StreamingChoices)  # noqa: S101
+        assert isinstance(item.choices[0], StreamingChoices)
         if item.choices and item.choices[0].delta.tool_calls:
             for tool_call in item.choices[0].delta.tool_calls:
                 if tool_call.function:
@@ -75,13 +75,13 @@ def update(self, item: ModelResponse) -> None:
             # litellm requires usage is not None for its total usage calculation
             item.usage = litellm.Usage()  # type: ignore[attr-defined]
         if not hasattr(item, "refusal"):
-            assert isinstance(item.choices[0], StreamingChoices)  # noqa: S101
+            assert isinstance(item.choices[0], StreamingChoices)
             item.choices[0].delta.refusal = None  # type: ignore[attr-defined]
         self._chat_completion_stream_state.handle_chunk(item)  # type: ignore[arg-type]
         usage = cast(litellm.Usage, item.usage)  # type: ignore[attr-defined,name-defined]
         # Ignore usages with 0 tokens
         if usage and usage.prompt_tokens and usage.completion_tokens:
-            assert not self.usage_ref  # noqa: S101
+            assert not self.usage_ref
             self.usage_ref.append(
                 Usage(
                     input_tokens=usage.prompt_tokens,
@@ -210,7 +210,7 @@ def complete(
                 tool_schemas=tool_schemas, output_types=output_types
             ),  # type: ignore[arg-type,unused-ignore]
         )
-        assert not isinstance(response, ModelResponse)  # noqa: S101
+        assert not isinstance(response, ModelResponse)
         stream = OutputStream(
             stream=response,
             function_schemas=function_schemas,
@@ -270,7 +270,7 @@ async def acomplete(
                 tool_schemas=tool_schemas, output_types=output_types
             ),  # type: ignore[arg-type,unused-ignore]
         )
-        assert not isinstance(response, ModelResponse)  # noqa: S101
+        assert not isinstance(response, ModelResponse)
         stream = AsyncOutputStream(
             stream=response,
             function_schemas=function_schemas,
diff --git a/src/magentic/chat_model/openai_chat_model.py b/src/magentic/chat_model/openai_chat_model.py
index b128948..1cf6537 100644
--- a/src/magentic/chat_model/openai_chat_model.py
+++ b/src/magentic/chat_model/openai_chat_model.py
@@ -1,11 +1,5 @@
 import base64
-from collections.abc import (
-    AsyncIterator,
-    Callable,
-    Iterable,
-    Iterator,
-    Sequence,
-)
+from collections.abc import AsyncIterator, Callable, Iterable, Iterator, Sequence
 from enum import Enum
 from functools import singledispatch
 from typing import Any, Generic, Literal, TypeVar, cast, overload
@@ -24,11 +18,7 @@
 
 from magentic._parsing import contains_parallel_function_call_type, contains_string_type
 from magentic._streamed_response import StreamedResponse
-from magentic.chat_model.base import (
-    ChatModel,
-    aparse_stream,
-    parse_stream,
-)
+from magentic.chat_model.base import ChatModel, aparse_stream, parse_stream
 from magentic.chat_model.function_schema import (
     BaseFunctionSchema,
     FunctionCallFunctionSchema,
@@ -53,11 +43,7 @@
     StreamParser,
     StreamState,
 )
-from magentic.function_call import (
-    FunctionCall,
-    ParallelFunctionCall,
-    _create_unique_id,
-)
+from magentic.function_call import FunctionCall, ParallelFunctionCall, _create_unique_id
 from magentic.streaming import StreamedStr
 from magentic.vision import UserImageMessage
 
@@ -78,9 +64,9 @@ def message_to_openai_message(message: Message[Any]) -> ChatCompletionMessagePar
 
 @message_to_openai_message.register(_RawMessage)
 def _(message: _RawMessage[Any]) -> ChatCompletionMessageParam:
-    assert isinstance(message.content, dict)  # noqa: S101
-    assert "role" in message.content  # noqa: S101
-    assert "content" in message.content  # noqa: S101
+    assert isinstance(message.content, dict)
+    assert "role" in message.content
+    assert "content" in message.content
     return cast(ChatCompletionMessageParam, message.content)
 
 
@@ -316,7 +302,7 @@ def update(self, item: ChatCompletionChunk) -> None:
                 tool_call_chunk.index = self._current_tool_call_index
         self._chat_completion_stream_state.handle_chunk(item)
         if item.usage:
-            assert not self.usage_ref  # noqa: S101
+            assert not self.usage_ref
             self.usage_ref.append(
                 Usage(
                     input_tokens=item.usage.prompt_tokens,
diff --git a/src/magentic/chat_model/retry_chat_model.py b/src/magentic/chat_model/retry_chat_model.py
index a5c901b..b7abf67 100644
--- a/src/magentic/chat_model/retry_chat_model.py
+++ b/src/magentic/chat_model/retry_chat_model.py
@@ -2,15 +2,8 @@
 from functools import singledispatchmethod
 from typing import Any, TypeVar, overload
 
-from magentic.chat_model.base import (
-    ChatModel,
-    ToolSchemaParseError,
-)
-from magentic.chat_model.message import (
-    AssistantMessage,
-    Message,
-    ToolResultMessage,
-)
+from magentic.chat_model.base import ChatModel, ToolSchemaParseError
+from magentic.chat_model.message import AssistantMessage, Message, ToolResultMessage
 from magentic.logger import logfire
 
 R = TypeVar("R")
diff --git a/src/magentic/chat_model/stream.py b/src/magentic/chat_model/stream.py
index 26078aa..c5b57c8 100644
--- a/src/magentic/chat_model/stream.py
+++ b/src/magentic/chat_model/stream.py
@@ -97,7 +97,7 @@ def _streamed_str(
                 yield content
             if self._parser.is_tool_call(item):
                 # TODO: Check if output types allow for early return and raise if not
-                assert not current_item_ref  # noqa: S101
+                assert not current_item_ref
                 current_item_ref.append(item)
                 return
         self._exhausted = True
@@ -113,7 +113,7 @@ def _tool_call(
             # so that the whole stream is consumed including stop_reason/usage chunks
             if item.id and item.id != current_tool_call_id:
                 # TODO: Check if output types allow for early return and raise if not
-                assert not current_tool_call_ref  # noqa: S101
+                assert not current_tool_call_ref
                 current_tool_call_ref.append(item)
                 return
             if item.args:
@@ -144,13 +144,13 @@ def __stream__(self) -> Iterator[StreamedStr | OutputT]:
                 while tool_call_ref:
                     current_tool_call_chunk = tool_call_ref.pop()
                     current_tool_call_id = current_tool_call_chunk.id
-                    assert current_tool_call_id is not None  # noqa: S101
-                    assert current_tool_call_chunk.name is not None  # noqa: S101
+                    assert current_tool_call_id is not None
+                    assert current_tool_call_chunk.name is not None
                     function_schema = select_function_schema(
                         self._function_schemas, current_tool_call_chunk.name
                     )
                     if function_schema is None:
-                        assert current_tool_call_id is not None  # noqa: S101
+                        assert current_tool_call_id is not None
                         raise UnknownToolError(
                             output_message=self._state.current_message_snapshot,
                             tool_call_id=current_tool_call_id,
@@ -169,12 +169,12 @@ def __stream__(self) -> Iterator[StreamedStr | OutputT]:
                         if not tool_call_ref and not self._exhausted:
                             # Finish the group to allow advancing to the next one
                             # Output must be Iterable if parse_args above did not consume
-                            assert isinstance(output, Iterable), output  # noqa: S101
+                            assert isinstance(output, Iterable), output
                             # Consume stream via the output type so it can cache
                             consume(output)
 
                     except ValidationError as e:
-                        assert current_tool_call_id is not None  # noqa: S101
+                        assert current_tool_call_id is not None
                         raise ToolSchemaParseError(
                             output_message=self._state.current_message_snapshot,
                             tool_call_id=current_tool_call_id,
@@ -221,7 +221,7 @@ async def _streamed_str(
                 yield content
             if self._parser.is_tool_call(item):
                 # TODO: Check if output types allow for early return
-                assert not current_item_ref  # noqa: S101
+                assert not current_item_ref
                 current_item_ref.append(item)
                 return
         self._exhausted = True
@@ -235,7 +235,7 @@ async def _tool_call(
         async for item in stream:
             if item.id and item.id != current_tool_call_id:
                 # TODO: Check if output types allow for early return
-                assert not current_tool_call_ref  # noqa: S101
+                assert not current_tool_call_ref
                 current_tool_call_ref.append(item)
                 return
             if item.args:
@@ -267,13 +267,13 @@ async def __stream__(self) -> AsyncIterator[AsyncStreamedStr | OutputT]:
                 while tool_call_ref:
                     current_tool_call_chunk = tool_call_ref.pop()
                     current_tool_call_id = current_tool_call_chunk.id
-                    assert current_tool_call_id is not None  # noqa: S101
-                    assert current_tool_call_chunk.name is not None  # noqa: S101
+                    assert current_tool_call_id is not None
+                    assert current_tool_call_chunk.name is not None
                     function_schema = select_function_schema(
                         self._function_schemas, current_tool_call_chunk.name
                     )
                     if function_schema is None:
-                        assert current_tool_call_id is not None  # noqa: S101
+                        assert current_tool_call_id is not None
                         raise UnknownToolError(
                             output_message=self._state.current_message_snapshot,
                             tool_call_id=current_tool_call_id,
@@ -292,11 +292,11 @@ async def __stream__(self) -> AsyncIterator[AsyncStreamedStr | OutputT]:
                         if not tool_call_ref and not self._exhausted:
                             # Finish the group to allow advancing to the next one
                             # Output must be AsyncIterable if aparse_args above did not consume
-                            assert isinstance(output, AsyncIterable), output  # noqa: S101
+                            assert isinstance(output, AsyncIterable), output
                             # Consume stream via the output type so it can cache
                             await aconsume(output)
                     except ValidationError as e:
-                        assert current_tool_call_id is not None  # noqa: S101
+                        assert current_tool_call_id is not None
                         raise ToolSchemaParseError(
                             output_message=self._state.current_message_snapshot,
                             tool_call_id=current_tool_call_id,
diff --git a/src/magentic/chatprompt.py b/src/magentic/chatprompt.py
index 906d3b3..dfdc29d 100644
--- a/src/magentic/chatprompt.py
+++ b/src/magentic/chatprompt.py
@@ -1,15 +1,7 @@
 import inspect
 from collections.abc import Awaitable, Callable, Sequence
 from functools import update_wrapper
-from typing import (
-    Any,
-    Generic,
-    ParamSpec,
-    Protocol,
-    TypeVar,
-    cast,
-    overload,
-)
+from typing import Any, Generic, ParamSpec, Protocol, TypeVar, cast, overload
 
 from magentic.backend import get_chat_model
 from magentic.chat_model.base import ChatModel
diff --git a/src/magentic/function_call.py b/src/magentic/function_call.py
index 73998ed..6a2dd36 100644
--- a/src/magentic/function_call.py
+++ b/src/magentic/function_call.py
@@ -8,13 +8,7 @@
     Iterable,
     Iterator,
 )
-from typing import (
-    Any,
-    Generic,
-    ParamSpec,
-    TypeVar,
-    cast,
-)
+from typing import Any, Generic, ParamSpec, TypeVar, cast
 from uuid import uuid4
 
 from magentic.logger import logfire
diff --git a/src/magentic/prompt_chain.py b/src/magentic/prompt_chain.py
index ab9b7fa..0d6434b 100644
--- a/src/magentic/prompt_chain.py
+++ b/src/magentic/prompt_chain.py
@@ -1,12 +1,7 @@
 import inspect
 from collections.abc import Callable
 from functools import wraps
-from typing import (
-    Any,
-    ParamSpec,
-    TypeVar,
-    cast,
-)
+from typing import Any, ParamSpec, TypeVar, cast
 
 from magentic.chat import Chat
 from magentic.chat_model.base import ChatModel
diff --git a/src/magentic/prompt_function.py b/src/magentic/prompt_function.py
index 42905e8..08dfdee 100644
--- a/src/magentic/prompt_function.py
+++ b/src/magentic/prompt_function.py
@@ -2,15 +2,7 @@
 import inspect
 from collections.abc import Awaitable, Callable, Sequence
 from functools import update_wrapper
-from typing import (
-    Any,
-    Generic,
-    ParamSpec,
-    Protocol,
-    TypeVar,
-    cast,
-    overload,
-)
+from typing import Any, Generic, ParamSpec, Protocol, TypeVar, cast, overload
 
 from magentic.backend import get_chat_model
 from magentic.chat_model.base import ChatModel
diff --git a/src/magentic/typing.py b/src/magentic/typing.py
index 21045e9..226bdad 100644
--- a/src/magentic/typing.py
+++ b/src/magentic/typing.py
@@ -1,14 +1,7 @@
 import inspect
 import types
 from collections.abc import Iterable, Mapping, Sequence
-from typing import (
-    Any,
-    TypeGuard,
-    TypeVar,
-    Union,
-    get_args,
-    get_origin,
-)
+from typing import Any, TypeGuard, TypeVar, Union, get_args, get_origin
 
 
 def is_union_type(type_: type) -> bool:
@@ -72,7 +65,7 @@ def name_type(type_: type) -> str:
         return name_type(origin) + "_" + "_".join(name_type(arg) for arg in args)
 
     if name := getattr(type_, "__name__", None):
-        assert isinstance(name, str)  # noqa: S101
+        assert isinstance(name, str)
 
         if len(args) == 1:
             return f"{name.lower()}_of_{name_type(args[0])}"
diff --git a/tests/test_backend.py b/tests/test_backend.py
index a0db208..590b53b 100644
--- a/tests/test_backend.py
+++ b/tests/test_backend.py
@@ -5,9 +5,7 @@
 from magentic.chat_model.litellm_chat_model import LitellmChatModel
 from magentic.chat_model.message import AssistantMessage, UserMessage
 from magentic.chat_model.mistral_chat_model import MistralChatModel
-from magentic.chat_model.openai_chat_model import (
-    OpenaiChatModel,
-)
+from magentic.chat_model.openai_chat_model import OpenaiChatModel
 
 
 def test_backend_anthropic_chat_model(monkeypatch):

From 813dbd9d90f7be0f5c5ba4e5c36fd84fdf1fd3a3 Mon Sep 17 00:00:00 2001
From: Jack Collins <6640905+jackmpcollins@users.noreply.github.com>
Date: Sat, 30 Nov 2024 23:25:48 -0800
Subject: [PATCH 6/7] Update overview and configuration docs (#386)

* Add github and X link to docs footer

* Simplify list of features

* Update the configuration docs
---
 README.md             | 153 +++++++++++++++++++++++++++++-------------
 docs/configuration.md | 139 ++++++++++++++++++++++++++++----------
 docs/index.md         |  16 ++---
 mkdocs.yml            |   5 ++
 4 files changed, 223 insertions(+), 90 deletions(-)

diff --git a/README.md b/README.md
index dea8681..4d46106 100644
--- a/README.md
+++ b/README.md
@@ -1,19 +1,16 @@
 # magentic
 
-Easily integrate Large Language Models into your Python code. Simply use the `@prompt` and `@chatprompt` decorators to create functions that return structured output from the LLM. Mix LLM queries and function calling with regular Python code to create complex logic.
+Seamlessly integrate Large Language Models into Python code. Use the `@prompt` and `@chatprompt` decorators to create functions that return structured output from an LLM. Combine LLM queries and tool use with traditional Python code to build complex agentic systems.
 
 ## Features
 
 - [Structured Outputs] using pydantic models and built-in python types.
-- [Chat Prompting] to enable few-shot prompting with structured examples.
-- [Function Calling] and [Parallel Function Calling] via the `FunctionCall` and `ParallelFunctionCall` return types.
-- [Formatting] to naturally insert python objects into prompts.
-- [Asyncio]. Simply use `async def` when defining a magentic function.
-- [Streaming] structured outputs to use them as they are being generated.
-- [Vision] to easily get structured outputs from images.
+- [Streaming] of structured outputs and function calls, to use them while being generated.
 - [LLM-Assisted Retries] to improve LLM adherence to complex output schemas.
-- Multiple LLM providers including OpenAI and Anthropic. See [Configuration].
+- [Observability] using OpenTelemetry, with native [Pydantic Logfire integration].
 - [Type Annotations] to work nicely with linters and IDEs.
+- [Configuration] options for multiple LLM providers including OpenAI, Anthropic, and Ollama.
+- Many more features: [Chat Prompting], [Parallel Function Calling], [Vision], [Formatting], [Asyncio]...
 
 ## Installation
 
@@ -184,6 +181,8 @@ LLM-powered functions created using `@prompt`, `@chatprompt` and `@prompt_chain`
 [Chat Prompting]: https://magentic.dev/chat-prompting
 [Function Calling]: https://magentic.dev/function-calling
 [Parallel Function Calling]: https://magentic.dev/function-calling/#parallelfunctioncall
+[Observability]: https://magentic.dev/logging-and-tracing
+[Pydantic Logfire integration]: https://logfire.pydantic.dev/docs/integrations/third-party/magentic/
 [Formatting]: https://magentic.dev/formatting
 [Asyncio]: https://magentic.dev/asyncio
 [Streaming]: https://magentic.dev/streaming
@@ -192,6 +191,7 @@ LLM-powered functions created using `@prompt`, `@chatprompt` and `@prompt_chain`
 [Configuration]: https://magentic.dev/configuration
 [Type Annotations]: https://magentic.dev/type-checking
 
+
 ### Streaming
 
 The `StreamedStr` (and `AsyncStreamedStr`) class can be used to stream the output of the LLM. This allows you to process the text while it is being generated, rather than receiving the whole output at once.
@@ -333,40 +333,107 @@ See [Asyncio] for more.
 
 ## Backend/LLM Configuration
 
-Magentic supports multiple "backends" (LLM providers). These are
-
-- `openai` : the default backend that uses the `openai` Python package. Supports all features of magentic.
-  ```python
-  from magentic import OpenaiChatModel
-  ```
-- `anthropic` : uses the `anthropic` Python package. Supports all features of magentic, however streaming responses are currently received all at once.
-  ```sh
-  pip install "magentic[anthropic]"
-  ```
-  ```python
-  from magentic.chat_model.anthropic_chat_model import AnthropicChatModel
-  ```
-- `litellm` : uses the `litellm` Python package to enable querying LLMs from [many different providers](https://docs.litellm.ai/docs/providers). Note: some models may not support all features of `magentic` e.g. function calling/structured output and streaming.
-  ```sh
-  pip install "magentic[litellm]"
-  ```
-  ```python
-  from magentic.chat_model.litellm_chat_model import LitellmChatModel
-  ```
-- `mistral` : uses the `openai` Python package with some small modifications to make the API queries compatible with the Mistral API. Supports all features of magentic, however tool calls (including structured outputs) are not streamed so are received all at once. Note: a future version of magentic might switch to using the `mistral` Python package.
-  ```python
-  from magentic.chat_model.mistral_chat_model import MistralChatModel
-  ```
-
-The backend and LLM (`ChatModel`) used by `magentic` can be configured in several ways. When a magentic function is called, the `ChatModel` to use follows this order of preference
+Magentic supports multiple LLM providers or "backends". This roughly refers to which Python package is used to interact with the LLM API. The following backends are supported.
+
+### OpenAI
+
+The default backend, using the `openai` Python package and supports all features of magentic.
+
+No additional installation is required. Just import the `OpenaiChatModel` class from `magentic`.
+
+```python
+from magentic import OpenaiChatModel
+
+model = OpenaiChatModel("gpt-4o")
+```
+
+#### Ollama via OpenAI
+
+Ollama supports an OpenAI-compatible API, which allows you to use Ollama models via the OpenAI backend.
+
+First, install ollama from [ollama.com](https://ollama.com/). Then, pull the model you want to use.
+
+```sh
+ollama pull llama3.2
+```
+
+Then, specify the model name and `base_url` when creating the `OpenaiChatModel` instance.
+
+```python
+from magentic import OpenaiChatModel
+
+model = OpenaiChatModel("llama3.2", base_url="http://localhost:11434/v1/")
+```
+
+#### Other OpenAI-compatible APIs
+
+When using the `openai` backend, setting the `MAGENTIC_OPENAI_BASE_URL` environment variable or using `OpenaiChatModel(..., base_url="http://localhost:8080")` in code allows you to use `magentic` with any OpenAI-compatible API e.g. [Azure OpenAI Service](https://learn.microsoft.com/en-us/azure/ai-services/openai/quickstart?tabs=command-line&pivots=programming-language-python#create-a-new-python-application), [LiteLLM OpenAI Proxy Server](https://docs.litellm.ai/docs/proxy_server), [LocalAI](https://localai.io/howtos/easy-request-openai/). Note that if the API does not support tool calls then you will not be able to create prompt-functions that return Python objects, but other features of `magentic` will still work.
+
+To use Azure with the openai backend you will need to set the `MAGENTIC_OPENAI_API_TYPE` environment variable to "azure" or use `OpenaiChatModel(..., api_type="azure")`, and also set the environment variables needed by the openai package to access Azure. See https://github.com/openai/openai-python#microsoft-azure-openai
+
+### Anthropic
+
+This uses the `anthropic` Python package and supports all features of magentic.
+
+Install the `magentic` package with the `anthropic` extra, or install the `anthropic` package directly.
+
+```sh
+pip install "magentic[anthropic]"
+```
+
+Then import the `AnthropicChatModel` class.
+
+```python
+from magentic.chat_model.anthropic_chat_model import AnthropicChatModel
+
+model = AnthropicChatModel("claude-3-5-sonnet-latest")
+```
+
+### LiteLLM
+
+This uses the `litellm` Python package to enable querying LLMs from [many different providers](https://docs.litellm.ai/docs/providers). Note: some models may not support all features of `magentic` e.g. function calling/structured output and streaming.
+
+Install the `magentic` package with the `litellm` extra, or install the `litellm` package directly.
+
+```sh
+pip install "magentic[litellm]"
+```
+
+Then import the `LitellmChatModel` class.
+
+```python
+from magentic.chat_model.litellm_chat_model import LitellmChatModel
+
+model = LitellmChatModel("gpt-4o")
+```
+
+### Mistral
+
+This uses the `openai` Python package with some small modifications to make the API queries compatible with the Mistral API. It supports all features of magentic. However tool calls (including structured outputs) are not streamed so are received all at once.
+
+Note: a future version of magentic might switch to using the `mistral` Python package.
+
+No additional installation is required. Just import the `MistralChatModel` class.
+
+```python
+from magentic.chat_model.mistral_chat_model import MistralChatModel
+
+model = MistralChatModel("mistral-large-latest")
+```
+
+## Configure a Backend
+
+The default `ChatModel` used by `magentic` (in `@prompt`, `@chatprompt`, etc.) can be configured in several ways. When a prompt-function or chatprompt-function is called, the `ChatModel` to use follows this order of preference
 
 1. The `ChatModel` instance provided as the `model` argument to the magentic decorator
 1. The current chat model context, created using `with MyChatModel:`
-1. The global `ChatModel` created from environment variables and the default settings in [src/magentic/settings.py](https://github.com/jackmpcollins/magentic/src/magentic/settings.py)
+1. The global `ChatModel` created from environment variables and the default settings in [src/magentic/settings.py](https://github.com/jackmpcollins/magentic/blob/main/src/magentic/settings.py)
+
+The following code snippet demonstrates this behavior:
 
 ```python
 from magentic import OpenaiChatModel, prompt
-from magentic.chat_model.litellm_chat_model import LitellmChatModel
+from magentic.chat_model.anthropic_chat_model import AnthropicChatModel
 
 
 @prompt("Say hello")
@@ -375,16 +442,16 @@ def say_hello() -> str: ...
 
 @prompt(
     "Say hello",
-    model=LitellmChatModel("ollama_chat/llama3"),
+    model=AnthropicChatModel("claude-3-5-sonnet-latest"),
 )
-def say_hello_litellm() -> str: ...
+def say_hello_anthropic() -> str: ...
 
 
 say_hello()  # Uses env vars or default settings
 
-with OpenaiChatModel("gpt-3.5-turbo", temperature=1):
-    say_hello()  # Uses openai with gpt-3.5-turbo and temperature=1 due to context manager
-    say_hello_litellm()  # Uses litellm with ollama_chat/llama3 because explicitly configured
+with OpenaiChatModel("gpt-4o-mini", temperature=1):
+    say_hello()  # Uses openai with gpt-4o-mini and temperature=1 due to context manager
+    say_hello_anthropic()  # Uses Anthropic claude-3-5-sonnet-latest because explicitly configured
 ```
 
 The following environment variables can be set.
@@ -415,10 +482,6 @@ The following environment variables can be set.
 | MAGENTIC_OPENAI_SEED           | Seed for deterministic sampling          | 42                           |
 | MAGENTIC_OPENAI_TEMPERATURE    | OpenAI temperature                       | 0.5                          |
 
-When using the `openai` backend, setting the `MAGENTIC_OPENAI_BASE_URL` environment variable or using `OpenaiChatModel(..., base_url="http://localhost:8080")` in code allows you to use `magentic` with any OpenAI-compatible API e.g. [Azure OpenAI Service](https://learn.microsoft.com/en-us/azure/ai-services/openai/quickstart?tabs=command-line&pivots=programming-language-python#create-a-new-python-application), [LiteLLM OpenAI Proxy Server](https://docs.litellm.ai/docs/proxy_server), [LocalAI](https://localai.io/howtos/easy-request-openai/). Note that if the API does not support tool calls then you will not be able to create prompt-functions that return Python objects, but other features of `magentic` will still work.
-
-To use Azure with the openai backend you will need to set the `MAGENTIC_OPENAI_API_TYPE` environment variable to "azure" or use `OpenaiChatModel(..., api_type="azure")`, and also set the environment variables needed by the openai package to access Azure. See https://github.com/openai/openai-python#microsoft-azure-openai
-
 ## Type Checking
 
 Many type checkers will raise warnings or errors for functions with the `@prompt` decorator due to the function having no body or return value. There are several ways to deal with these.
diff --git a/docs/configuration.md b/docs/configuration.md
index cc54a17..3c97d2a 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1,39 +1,108 @@
 # LLM Configuration
 
-Magentic supports multiple "backends" (LLM providers). These are
-
-- `openai` : the default backend that uses the `openai` Python package. Supports all features of magentic.
-  ```python
-  from magentic import OpenaiChatModel
-  ```
-- `anthropic` : uses the `anthropic` Python package. Supports all features of magentic, however streaming responses are currently received all at once.
-  ```sh
-  pip install "magentic[anthropic]"
-  ```
-  ```python
-  from magentic.chat_model.anthropic_chat_model import AnthropicChatModel
-  ```
-- `litellm` : uses the `litellm` Python package to enable querying LLMs from [many different providers](https://docs.litellm.ai/docs/providers). Note: some models may not support all features of `magentic` e.g. function calling/structured output and streaming.
-  ```sh
-  pip install "magentic[litellm]"
-  ```
-  ```python
-  from magentic.chat_model.litellm_chat_model import LitellmChatModel
-  ```
-- `mistral` : uses the `openai` Python package with some small modifications to make the API queries compatible with the Mistral API. Supports all features of magentic, however tool calls (including structured outputs) are not streamed so are received all at once. Note: a future version of magentic might switch to using the `mistral` Python package.
-  ```python
-  from magentic.chat_model.mistral_chat_model import MistralChatModel
-  ```
-
-The backend and LLM (`ChatModel`) used by `magentic` can be configured in several ways. When a magentic function is called, the `ChatModel` to use follows this order of preference
+## Backends
+
+Magentic supports multiple LLM providers or "backends". This roughly refers to which Python package is used to interact with the LLM API. The following backends are supported.
+
+### OpenAI
+
+The default backend, using the `openai` Python package and supports all features of magentic.
+
+No additional installation is required. Just import the `OpenaiChatModel` class from `magentic`.
+
+```python
+from magentic import OpenaiChatModel
+
+model = OpenaiChatModel("gpt-4o")
+```
+
+#### Ollama via OpenAI
+
+Ollama supports an OpenAI-compatible API, which allows you to use Ollama models via the OpenAI backend.
+
+First, install ollama from [ollama.com](https://ollama.com/). Then, pull the model you want to use.
+
+```sh
+ollama pull llama3.2
+```
+
+Then, specify the model name and `base_url` when creating the `OpenaiChatModel` instance.
+
+```python
+from magentic import OpenaiChatModel
+
+model = OpenaiChatModel("llama3.2", base_url="http://localhost:11434/v1/")
+```
+
+#### Other OpenAI-compatible APIs
+
+When using the `openai` backend, setting the `MAGENTIC_OPENAI_BASE_URL` environment variable or using `OpenaiChatModel(..., base_url="http://localhost:8080")` in code allows you to use `magentic` with any OpenAI-compatible API e.g. [Azure OpenAI Service](https://learn.microsoft.com/en-us/azure/ai-services/openai/quickstart?tabs=command-line&pivots=programming-language-python#create-a-new-python-application), [LiteLLM OpenAI Proxy Server](https://docs.litellm.ai/docs/proxy_server), [LocalAI](https://localai.io/howtos/easy-request-openai/). Note that if the API does not support tool calls then you will not be able to create prompt-functions that return Python objects, but other features of `magentic` will still work.
+
+To use Azure with the openai backend you will need to set the `MAGENTIC_OPENAI_API_TYPE` environment variable to "azure" or use `OpenaiChatModel(..., api_type="azure")`, and also set the environment variables needed by the openai package to access Azure. See https://github.com/openai/openai-python#microsoft-azure-openai
+
+### Anthropic
+
+This uses the `anthropic` Python package and supports all features of magentic.
+
+Install the `magentic` package with the `anthropic` extra, or install the `anthropic` package directly.
+
+```sh
+pip install "magentic[anthropic]"
+```
+
+Then import the `AnthropicChatModel` class.
+
+```python
+from magentic.chat_model.anthropic_chat_model import AnthropicChatModel
+
+model = AnthropicChatModel("claude-3-5-sonnet-latest")
+```
+
+### LiteLLM
+
+This uses the `litellm` Python package to enable querying LLMs from [many different providers](https://docs.litellm.ai/docs/providers). Note: some models may not support all features of `magentic` e.g. function calling/structured output and streaming.
+
+Install the `magentic` package with the `litellm` extra, or install the `litellm` package directly.
+
+```sh
+pip install "magentic[litellm]"
+```
+
+Then import the `LitellmChatModel` class.
+
+```python
+from magentic.chat_model.litellm_chat_model import LitellmChatModel
+
+model = LitellmChatModel("gpt-4o")
+```
+
+### Mistral
+
+This uses the `openai` Python package with some small modifications to make the API queries compatible with the Mistral API. It supports all features of magentic. However tool calls (including structured outputs) are not streamed so are received all at once.
+
+Note: a future version of magentic might switch to using the `mistral` Python package.
+
+No additional installation is required. Just import the `MistralChatModel` class.
+
+```python
+from magentic.chat_model.mistral_chat_model import MistralChatModel
+
+model = MistralChatModel("mistral-large-latest")
+```
+
+## Configure a Backend
+
+The default `ChatModel` used by `magentic` (in `@prompt`, `@chatprompt`, etc.) can be configured in several ways. When a prompt-function or chatprompt-function is called, the `ChatModel` to use follows this order of preference
 
 1. The `ChatModel` instance provided as the `model` argument to the magentic decorator
 1. The current chat model context, created using `with MyChatModel:`
-1. The global `ChatModel` created from environment variables and the default settings in [src/magentic/settings.py](https://github.com/jackmpcollins/magentic/src/magentic/settings.py)
+1. The global `ChatModel` created from environment variables and the default settings in [src/magentic/settings.py](https://github.com/jackmpcollins/magentic/blob/main/src/magentic/settings.py)
+
+The following code snippet demonstrates this behavior:
 
 ```python
 from magentic import OpenaiChatModel, prompt
-from magentic.chat_model.litellm_chat_model import LitellmChatModel
+from magentic.chat_model.anthropic_chat_model import AnthropicChatModel
 
 
 @prompt("Say hello")
@@ -42,16 +111,16 @@ def say_hello() -> str: ...
 
 @prompt(
     "Say hello",
-    model=LitellmChatModel("ollama_chat/llama3"),
+    model=AnthropicChatModel("claude-3-5-sonnet-latest"),
 )
-def say_hello_litellm() -> str: ...
+def say_hello_anthropic() -> str: ...
 
 
 say_hello()  # Uses env vars or default settings
 
-with OpenaiChatModel("gpt-3.5-turbo", temperature=1):
-    say_hello()  # Uses openai with gpt-3.5-turbo and temperature=1 due to context manager
-    say_hello_litellm()  # Uses litellm with ollama_chat/llama3 because explicitly configured
+with OpenaiChatModel("gpt-4o-mini", temperature=1):
+    say_hello()  # Uses openai with gpt-4o-mini and temperature=1 due to context manager
+    say_hello_anthropic()  # Uses Anthropic claude-3-5-sonnet-latest because explicitly configured
 ```
 
 The following environment variables can be set.
@@ -81,7 +150,3 @@ The following environment variables can be set.
 | MAGENTIC_OPENAI_MAX_TOKENS     | OpenAI max number of generated tokens    | 1024                         |
 | MAGENTIC_OPENAI_SEED           | Seed for deterministic sampling          | 42                           |
 | MAGENTIC_OPENAI_TEMPERATURE    | OpenAI temperature                       | 0.5                          |
-
-When using the `openai` backend, setting the `MAGENTIC_OPENAI_BASE_URL` environment variable or using `OpenaiChatModel(..., base_url="http://localhost:8080")` in code allows you to use `magentic` with any OpenAI-compatible API e.g. [Azure OpenAI Service](https://learn.microsoft.com/en-us/azure/ai-services/openai/quickstart?tabs=command-line&pivots=programming-language-python#create-a-new-python-application), [LiteLLM OpenAI Proxy Server](https://docs.litellm.ai/docs/proxy_server), [LocalAI](https://localai.io/howtos/easy-request-openai/). Note that if the API does not support tool calls then you will not be able to create prompt-functions that return Python objects, but other features of `magentic` will still work.
-
-To use Azure with the openai backend you will need to set the `MAGENTIC_OPENAI_API_TYPE` environment variable to "azure" or use `OpenaiChatModel(..., api_type="azure")`, and also set the environment variables needed by the openai package to access Azure. See https://github.com/openai/openai-python#microsoft-azure-openai
diff --git a/docs/index.md b/docs/index.md
index 4751f10..d71c1aa 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,19 +1,17 @@
 # Overview
 
-Easily integrate Large Language Models into your Python code. Simply use the `@prompt` and `@chatprompt` decorators to create functions that return structured output from the LLM. Mix LLM queries and function calling with regular Python code to create complex logic.
+Seamlessly integrate Large Language Models into Python code. Use the `@prompt` and `@chatprompt` decorators to create functions that return structured output from an LLM. Combine LLM queries and tool use with traditional Python code to build complex agentic systems.
 
 ## Features
 
 - [Structured Outputs] using pydantic models and built-in python types.
-- [Chat Prompting] to enable few-shot prompting with structured examples.
-- [Function Calling] and [Parallel Function Calling] via the `FunctionCall` and `ParallelFunctionCall` return types.
-- [Formatting] to naturally insert python objects into prompts.
-- [Asyncio]. Simply use `async def` when defining a magentic function.
-- [Streaming] structured outputs to use them as they are being generated.
-- [Vision] to easily get structured outputs from images.
+- [Streaming] of structured outputs and function calls, to use them while being generated.
 - [LLM-Assisted Retries] to improve LLM adherence to complex output schemas.
-- Multiple LLM providers including OpenAI and Anthropic. See [Configuration].
+- [Observability] using OpenTelemetry, with native [Pydantic Logfire integration].
 - [Type Annotations] to work nicely with linters and IDEs.
+- [Configuration] options for multiple LLM providers including OpenAI, Anthropic, and Ollama.
+- Many more features: [Chat Prompting], [Parallel Function Calling], [Vision], [Formatting], [Asyncio]...
+
 
 ## Installation
 
@@ -184,6 +182,8 @@ LLM-powered functions created using `@prompt`, `@chatprompt` and `@prompt_chain`
 [Chat Prompting]: chat-prompting.md
 [Function Calling]: function-calling.md
 [Parallel Function Calling]: function-calling.md#parallelfunctioncall
+[Observability]: logging-and-tracing.md
+[Pydantic Logfire integration]: https://logfire.pydantic.dev/docs/integrations/third-party/magentic/
 [Formatting]: formatting.md
 [Asyncio]: asyncio.md
 [Streaming]: streaming.md
diff --git a/mkdocs.yml b/mkdocs.yml
index 0140ab8..86ee6d3 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -67,6 +67,11 @@ extra:
           data: 1
           note: >-
             Thanks for your feedback!
+  social:
+    - icon: fontawesome/brands/x-twitter
+      link: https://x.com/jackmpcollins
+    - icon: fontawesome/brands/github
+      link: https://github.com/jackmpcollins
 
 # https://www.mkdocs.org/user-guide/configuration/#validation
 validation:

From d7ae5c26ff259448a5d1cf9bad3c10340949a37c Mon Sep 17 00:00:00 2001
From: Jack Collins <6640905+jackmpcollins@users.noreply.github.com>
Date: Sun, 1 Dec 2024 00:08:56 -0800
Subject: [PATCH 7/7] Bump version to 0.34.1

---
 pyproject.toml | 2 +-
 uv.lock        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8165cce..74a59db 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "magentic"
-version = "0.34.0"
+version = "0.34.1"
 description = "Seamlessly integrate LLMs as Python functions"
 readme = "README.md"
 requires-python = ">=3.10"
diff --git a/uv.lock b/uv.lock
index 34e3791..2480f70 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1389,7 +1389,7 @@ wheels = [
 
 [[package]]
 name = "magentic"
-version = "0.34.0"
+version = "0.34.1"
 source = { editable = "." }
 dependencies = [
     { name = "filetype" },