diff --git a/guidance/__init__.py b/guidance/__init__.py
index 9ef0139a6..c5e37bb0e 100644
--- a/guidance/__init__.py
+++ b/guidance/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.1.8"
+__version__ = "0.1.9"
 
 import functools
 import os
diff --git a/guidance/models/_model.py b/guidance/models/_model.py
index cb3ba37f2..c849777e9 100644
--- a/guidance/models/_model.py
+++ b/guidance/models/_model.py
@@ -654,7 +654,6 @@ def __call__(self, grammar, max_tokens=1000000, n=1, top_p=1, temperature=0.0, e
         token_count = 0
         last_token_count = 0
         was_forced = False
-        is_generated = False
         captured_data = {}
         captured_log_prob_data = {}
         while True: # each iteration generates one more token (and some of the associated bytes)
@@ -670,6 +669,7 @@ def __call__(self, grammar, max_tokens=1000000, n=1, top_p=1, temperature=0.0, e
             parser.mark_new_token()
 
             # walk down the trie as far as possible before computing the logits
+            is_generated = False
             retry_token_gen = False
             trie = self._token_trie
             trie.match_version += 1 # this invalidates all the match caches from the previous token