Skip to content

Commit

Permalink
git config to 3.11 and model.py for cpu
Browse files Browse the repository at this point in the history
  • Loading branch information
luv-bansal committed Jan 21, 2025
1 parent 5b5007d commit 48f2c4b
Show file tree
Hide file tree
Showing 5 changed files with 10 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ model:
model_type_id: "visual-classifier"

build_info:
python_version: "3.10"
python_version: "3.11"

inference_compute_info:
cpu_limit: "1"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ model:
model_type_id: "visual-detector"

build_info:
python_version: "3.10"
python_version: "3.11"

inference_compute_info:
cpu_limit: "1"
Expand Down
2 changes: 1 addition & 1 deletion models/model_upload/llms/llama-3-8b-instruct/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ model:
model_type_id: "text-to-text"

build_info:
python_version: "3.10"
python_version: "3.11"

inference_compute_info:
cpu_limit: "1"
Expand Down
10 changes: 6 additions & 4 deletions models/model_upload/llms/llama-3_2-1b-instruct/1/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,11 @@ def load_model(self):
# if checkpoints section is in config.yaml file then checkpoints will be downloaded at this path during model upload time.
checkpoints = os.path.join(os.path.dirname(__file__), "checkpoints")
self.tokenizer = AutoTokenizer.from_pretrained(checkpoints,)
self.tokenizer.pad_token = self.tokenizer.eos_token # Set pad token to eos token
self.tokenizer.pad_token = self.tokenizer.eos_token # Set pad token to eos token
self.model = AutoModelForCausalLM.from_pretrained(
checkpoints,
low_cpu_mem_usage=True,
device_map="auto",
device_map=self.device,
torch_dtype=torch.bfloat16,
)
logger.info("Done loading!")
Expand Down Expand Up @@ -161,7 +161,8 @@ def predict(self,
for text in outputs_text:
outputs.append(create_output(text=text, code=status_code_pb2.SUCCESS))

return service_pb2.MultiOutputResponse(outputs=outputs, status=status_pb2.Status(code=status_code_pb2.SUCCESS))
return service_pb2.MultiOutputResponse(
outputs=outputs, status=status_pb2.Status(code=status_code_pb2.SUCCESS))

def generate(self, request: service_pb2.PostModelOutputsRequest
) -> Iterator[service_pb2.MultiOutputResponse]:
Expand Down Expand Up @@ -208,7 +209,8 @@ def generate(self, request: service_pb2.PostModelOutputsRequest
outputs[idx].data.text.raw = text # Append new text to each output
outputs[idx].status.code = status_code_pb2.SUCCESS
# Yield the current outputs
yield service_pb2.MultiOutputResponse(outputs=outputs, status=status_pb2.Status(code=status_code_pb2.SUCCESS))
yield service_pb2.MultiOutputResponse(
outputs=outputs, status=status_pb2.Status(code=status_code_pb2.SUCCESS))
finally:
thread.join()

Expand Down
2 changes: 1 addition & 1 deletion models/model_upload/llms/llama-3_2-1b-instruct/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ model:
model_type_id: "text-to-text"

build_info:
python_version: "3.10"
python_version: "3.11"

inference_compute_info:
cpu_limit: "1"
Expand Down

0 comments on commit 48f2c4b

Please sign in to comment.