Merge pull request #113 from Clarifai/fix-model-examples-minor

Update config Python version and Model.py to run on CPU
Clarifai · Jan 21, 2025 · 847e2a9 · 847e2a9
2 parents 5b5007d + 1c0799d
commit 847e2a9
Show file tree

Hide file tree

Showing 13 changed files with 18 additions and 18 deletions.
diff --git a/models/model_upload/image-classifier/nsfw-image-classifier/config.yaml b/models/model_upload/image-classifier/nsfw-image-classifier/config.yaml
@@ -7,7 +7,7 @@ model:
   model_type_id: "visual-classifier"
 
 build_info:
-  python_version: "3.10"
+  python_version: "3.11"
 
 inference_compute_info:
   cpu_limit: "1"

diff --git a/models/model_upload/image-detector/detr-resnet-image-detection/config.yaml b/models/model_upload/image-detector/detr-resnet-image-detection/config.yaml
@@ -7,7 +7,7 @@ model:
   model_type_id: "visual-detector"
 
 build_info:
-  python_version: "3.10"
+  python_version: "3.11"
 
 inference_compute_info:
   cpu_limit: "1"

diff --git a/models/model_upload/llms/llama-3-8b-instruct/config.yaml b/models/model_upload/llms/llama-3-8b-instruct/config.yaml
@@ -7,7 +7,7 @@ model:
   model_type_id: "text-to-text"
 
 build_info:
-  python_version: "3.10"
+  python_version: "3.11"
 
 inference_compute_info:
   cpu_limit: "1"

diff --git a/models/model_upload/llms/llama-3_2-1b-instruct/1/model.py b/models/model_upload/llms/llama-3_2-1b-instruct/1/model.py
@@ -126,11 +126,11 @@ def load_model(self):
     # if checkpoints section is in config.yaml file then checkpoints will be downloaded at this path during model upload time.
     checkpoints = os.path.join(os.path.dirname(__file__), "checkpoints")
     self.tokenizer = AutoTokenizer.from_pretrained(checkpoints,)
-    self.tokenizer.pad_token = self.tokenizer.eos_token # Set pad token to eos token
+    self.tokenizer.pad_token = self.tokenizer.eos_token  # Set pad token to eos token
     self.model = AutoModelForCausalLM.from_pretrained(
         checkpoints,
         low_cpu_mem_usage=True,
-        device_map="auto",
+        device_map=self.device,
         torch_dtype=torch.bfloat16,
     )
     logger.info("Done loading!")
@@ -161,7 +161,8 @@ def predict(self,
     for text in outputs_text:
       outputs.append(create_output(text=text, code=status_code_pb2.SUCCESS))
 
-    return service_pb2.MultiOutputResponse(outputs=outputs, status=status_pb2.Status(code=status_code_pb2.SUCCESS))
+    return service_pb2.MultiOutputResponse(
+        outputs=outputs, status=status_pb2.Status(code=status_code_pb2.SUCCESS))
 
   def generate(self, request: service_pb2.PostModelOutputsRequest
               ) -> Iterator[service_pb2.MultiOutputResponse]:
@@ -208,7 +209,8 @@ def generate(self, request: service_pb2.PostModelOutputsRequest
           outputs[idx].data.text.raw = text  # Append new text to each output
           outputs[idx].status.code = status_code_pb2.SUCCESS
         # Yield the current outputs
-        yield service_pb2.MultiOutputResponse(outputs=outputs, status=status_pb2.Status(code=status_code_pb2.SUCCESS))
+        yield service_pb2.MultiOutputResponse(
+            outputs=outputs, status=status_pb2.Status(code=status_code_pb2.SUCCESS))
     finally:
       thread.join()
 

diff --git a/models/model_upload/llms/llama-3_2-1b-instruct/config.yaml b/models/model_upload/llms/llama-3_2-1b-instruct/config.yaml
@@ -7,7 +7,7 @@ model:
   model_type_id: "text-to-text"
 
 build_info:
-  python_version: "3.10"
+  python_version: "3.11"
 
 inference_compute_info:
   cpu_limit: "1"

diff --git a/models/model_upload/llms/lmdeploy-llama-3_2-1b-instruct/config.yaml b/models/model_upload/llms/lmdeploy-llama-3_2-1b-instruct/config.yaml
@@ -7,7 +7,7 @@ model:
   model_type_id: "text-to-text"
 
 build_info:
-  python_version: "3.10"
+  python_version: "3.11"
 
 inference_compute_info:
   cpu_limit: "2"

diff --git a/models/model_upload/llms/openai-gpt4/config.yaml b/models/model_upload/llms/openai-gpt4/config.yaml
@@ -7,7 +7,7 @@ model:
   model_type_id: "text-to-text"
 
 build_info:
-  python_version: "3.10"
+  python_version: "3.11"
 
 inference_compute_info:
   cpu_limit: "1"

diff --git a/models/model_upload/llms/vllm-mistral-7b-instruct/config.yaml b/models/model_upload/llms/vllm-mistral-7b-instruct/config.yaml
@@ -7,7 +7,7 @@ model:
   model_type_id: "text-to-text"
 
 build_info:
-  python_version: "3.10"
+  python_version: "3.11"
 
 inference_compute_info:
   cpu_limit: "1"

diff --git a/models/model_upload/multimodal_models/vllm-miniCPM-2.6/config.yaml b/models/model_upload/multimodal_models/vllm-miniCPM-2.6/config.yaml
@@ -7,7 +7,7 @@ model:
   model_type_id: "multimodal-to-text"
 
 build_info:
-  python_version: "3.10"
+  python_version: "3.11"
 
 inference_compute_info:
   cpu_limit: "1"

diff --git a/models/model_upload/ocr/got-ocr2.0/1/model.py b/models/model_upload/ocr/got-ocr2.0/1/model.py
@@ -38,10 +38,9 @@ def load_model(self):
         checkpoint_path,
         trust_remote_code=True,
         use_safetensors=True,
-        device_map="cuda",
+        device_map= self.device,
         low_cpu_mem_usage=True,
         pad_token_id=self.tokenizer.eos_token_id)
-    self.model.eval().cuda()
     logger.info("Done loading Model checkpoints!")
 
   def predict(self, request: service_pb2.PostModelOutputsRequest

diff --git a/models/model_upload/ocr/got-ocr2.0/config.yaml b/models/model_upload/ocr/got-ocr2.0/config.yaml
@@ -7,7 +7,7 @@ model:
   model_type_id: "image-to-text"
 
 build_info:
-  python_version: "3.10"
+  python_version: "3.11"
 
 inference_compute_info:
   cpu_limit: "1"

diff --git a/models/model_upload/speech-recognition/openai-whisper/config.yaml b/models/model_upload/speech-recognition/openai-whisper/config.yaml
@@ -7,7 +7,7 @@ model:
   model_type_id: "audio-to-text"
 
 build_info:
-  python_version: "3.10"
+  python_version: "3.12"
 
 inference_compute_info:
   cpu_limit: "1"

diff --git a/models/model_upload/test-upload/mbart/1/model.py b/models/model_upload/test-upload/mbart/1/model.py
@@ -39,7 +39,7 @@ def load_model(self):
     # if checkpoints section is in config.yaml file then checkpoints will be downloaded at this path during model upload time.
     self.tokenizer = AutoTokenizer.from_pretrained(checkpoints)
     self.model = AutoModelForSeq2SeqLM.from_pretrained(
-        checkpoints, torch_dtype="auto", device_map="auto")
+        checkpoints, torch_dtype="auto", device_map=self.device)
 
   def predict(self, request: service_pb2.PostModelOutputsRequest
              ) -> Iterator[service_pb2.MultiOutputResponse]:
@@ -51,7 +51,6 @@ def predict(self, request: service_pb2.PostModelOutputsRequest
     raw_texts = []
     for t in texts:
       inputs = self.tokenizer.encode(t, return_tensors="pt").to(self.device)
-      # inputs = self.tokenizer.encode("Translate to English: Je t'aime.", return_tensors="pt").to(self.device)
       outputs = self.model.generate(inputs)
       print(self.tokenizer.decode(outputs[0]))
       raw_texts.append(self.tokenizer.decode(outputs[0]))