Skip to content

Commit

Permalink
Merge pull request #113 from Clarifai/fix-model-examples-minor
Browse files Browse the repository at this point in the history
Update config Python version and Model.py to run on CPU
  • Loading branch information
luv-bansal authored Jan 21, 2025
2 parents 5b5007d + 1c0799d commit 847e2a9
Show file tree
Hide file tree
Showing 13 changed files with 18 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ model:
model_type_id: "visual-classifier"

build_info:
python_version: "3.10"
python_version: "3.11"

inference_compute_info:
cpu_limit: "1"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ model:
model_type_id: "visual-detector"

build_info:
python_version: "3.10"
python_version: "3.11"

inference_compute_info:
cpu_limit: "1"
Expand Down
2 changes: 1 addition & 1 deletion models/model_upload/llms/llama-3-8b-instruct/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ model:
model_type_id: "text-to-text"

build_info:
python_version: "3.10"
python_version: "3.11"

inference_compute_info:
cpu_limit: "1"
Expand Down
10 changes: 6 additions & 4 deletions models/model_upload/llms/llama-3_2-1b-instruct/1/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,11 @@ def load_model(self):
# if checkpoints section is in config.yaml file then checkpoints will be downloaded at this path during model upload time.
checkpoints = os.path.join(os.path.dirname(__file__), "checkpoints")
self.tokenizer = AutoTokenizer.from_pretrained(checkpoints,)
self.tokenizer.pad_token = self.tokenizer.eos_token # Set pad token to eos token
self.tokenizer.pad_token = self.tokenizer.eos_token # Set pad token to eos token
self.model = AutoModelForCausalLM.from_pretrained(
checkpoints,
low_cpu_mem_usage=True,
device_map="auto",
device_map=self.device,
torch_dtype=torch.bfloat16,
)
logger.info("Done loading!")
Expand Down Expand Up @@ -161,7 +161,8 @@ def predict(self,
for text in outputs_text:
outputs.append(create_output(text=text, code=status_code_pb2.SUCCESS))

return service_pb2.MultiOutputResponse(outputs=outputs, status=status_pb2.Status(code=status_code_pb2.SUCCESS))
return service_pb2.MultiOutputResponse(
outputs=outputs, status=status_pb2.Status(code=status_code_pb2.SUCCESS))

def generate(self, request: service_pb2.PostModelOutputsRequest
) -> Iterator[service_pb2.MultiOutputResponse]:
Expand Down Expand Up @@ -208,7 +209,8 @@ def generate(self, request: service_pb2.PostModelOutputsRequest
outputs[idx].data.text.raw = text # Append new text to each output
outputs[idx].status.code = status_code_pb2.SUCCESS
# Yield the current outputs
yield service_pb2.MultiOutputResponse(outputs=outputs, status=status_pb2.Status(code=status_code_pb2.SUCCESS))
yield service_pb2.MultiOutputResponse(
outputs=outputs, status=status_pb2.Status(code=status_code_pb2.SUCCESS))
finally:
thread.join()

Expand Down
2 changes: 1 addition & 1 deletion models/model_upload/llms/llama-3_2-1b-instruct/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ model:
model_type_id: "text-to-text"

build_info:
python_version: "3.10"
python_version: "3.11"

inference_compute_info:
cpu_limit: "1"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ model:
model_type_id: "text-to-text"

build_info:
python_version: "3.10"
python_version: "3.11"

inference_compute_info:
cpu_limit: "2"
Expand Down
2 changes: 1 addition & 1 deletion models/model_upload/llms/openai-gpt4/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ model:
model_type_id: "text-to-text"

build_info:
python_version: "3.10"
python_version: "3.11"

inference_compute_info:
cpu_limit: "1"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ model:
model_type_id: "text-to-text"

build_info:
python_version: "3.10"
python_version: "3.11"

inference_compute_info:
cpu_limit: "1"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ model:
model_type_id: "multimodal-to-text"

build_info:
python_version: "3.10"
python_version: "3.11"

inference_compute_info:
cpu_limit: "1"
Expand Down
3 changes: 1 addition & 2 deletions models/model_upload/ocr/got-ocr2.0/1/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,9 @@ def load_model(self):
checkpoint_path,
trust_remote_code=True,
use_safetensors=True,
device_map="cuda",
device_map= self.device,
low_cpu_mem_usage=True,
pad_token_id=self.tokenizer.eos_token_id)
self.model.eval().cuda()
logger.info("Done loading Model checkpoints!")

def predict(self, request: service_pb2.PostModelOutputsRequest
Expand Down
2 changes: 1 addition & 1 deletion models/model_upload/ocr/got-ocr2.0/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ model:
model_type_id: "image-to-text"

build_info:
python_version: "3.10"
python_version: "3.11"

inference_compute_info:
cpu_limit: "1"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ model:
model_type_id: "audio-to-text"

build_info:
python_version: "3.10"
python_version: "3.12"

inference_compute_info:
cpu_limit: "1"
Expand Down
3 changes: 1 addition & 2 deletions models/model_upload/test-upload/mbart/1/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def load_model(self):
# if checkpoints section is in config.yaml file then checkpoints will be downloaded at this path during model upload time.
self.tokenizer = AutoTokenizer.from_pretrained(checkpoints)
self.model = AutoModelForSeq2SeqLM.from_pretrained(
checkpoints, torch_dtype="auto", device_map="auto")
checkpoints, torch_dtype="auto", device_map=self.device)

def predict(self, request: service_pb2.PostModelOutputsRequest
) -> Iterator[service_pb2.MultiOutputResponse]:
Expand All @@ -51,7 +51,6 @@ def predict(self, request: service_pb2.PostModelOutputsRequest
raw_texts = []
for t in texts:
inputs = self.tokenizer.encode(t, return_tensors="pt").to(self.device)
# inputs = self.tokenizer.encode("Translate to English: Je t'aime.", return_tensors="pt").to(self.device)
outputs = self.model.generate(inputs)
print(self.tokenizer.decode(outputs[0]))
raw_texts.append(self.tokenizer.decode(outputs[0]))
Expand Down

0 comments on commit 847e2a9

Please sign in to comment.