Add support for mistral ai's instruct model, avoid system start token…

… duplicate, remove extra log (#71) Signed-off-by: Hung-Han (Henry) Chen <[email protected]>
chenhunghan · Sep 28, 2023 · 195122b · 195122b
1 parent 292250a
commit 195122b
Show file tree

Hide file tree

Showing 3 changed files with 15 additions and 3 deletions.
diff --git a/charts/ialacol/Chart.yaml b/charts/ialacol/Chart.yaml
@@ -1,6 +1,6 @@
 apiVersion: v2
-appVersion: 0.11.4
+appVersion: 0.11.5
 description: A Helm chart for ialacol
 name: ialacol
 type: application
-version: 0.11.4
+version: 0.11.5
diff --git a/main.py b/main.py
@@ -310,6 +310,16 @@ async def chat_completions(
         assistant_end = ""
         user_start = "### Instruction:\n"
         user_end = "\n\n"
+    # For instruct fine-tuned models using mistral prompt template
+    # https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1
+    if "mistral" in body.model.lower() and "instruct" in body.model.lower():
+        system_start = "<s>"
+        system = ""
+        system_end = ""
+        assistant_start = ""
+        assistant_end = "</s> "
+        user_start = "[INST] "
+        user_end = " [/INST]"
     if "starchat" in body.model.lower():
         # See https://huggingface.co/blog/starchat-alpha and https://huggingface.co/TheBloke/starchat-beta-GGML#prompt-template
         system_start = "<|system|>"
@@ -380,6 +390,9 @@ async def chat_completions(
     # avoid duplicate assistant start token in prompt if user message already includes it
     if len(assistant_start) > 0 and assistant_start in user_message_content:
         assistant_start = ""
+    # avoid duplicate system_start token in prompt if system_message_content already includes it
+    if len(system_start) > 0 and system_start in system_message_content:
+        system_start = ""
     prompt = f"{system_start}{system_message_content}{system_end}{assistant_message_content}{user_start}{user_message_content}{user_end}{assistant_start}"
     model_name = body.model
     llm = request.app.state.llm

diff --git a/model_generate.py b/model_generate.py
@@ -104,7 +104,6 @@ def chat_model_generate(
     stop = config.stop
     log.debug("stop: %s", stop)
     log.debug("prompt: %s", prompt)
-    log.debug("prompt: %s", prompt)
 
     log.debug("Getting from ctransformer instance")
     result: str = llm(  # pyright: ignore [reportGeneralTypeIssues]