Skip to content

Commit

Permalink
git config to 3.11 and model.py for cpu
Browse files Browse the repository at this point in the history
  • Loading branch information
luv-bansal committed Jan 21, 2025
1 parent 48f2c4b commit 1c0799d
Show file tree
Hide file tree
Showing 8 changed files with 8 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ model:
model_type_id: "text-to-text"

build_info:
python_version: "3.10"
python_version: "3.11"

inference_compute_info:
cpu_limit: "2"
Expand Down
2 changes: 1 addition & 1 deletion models/model_upload/llms/openai-gpt4/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ model:
model_type_id: "text-to-text"

build_info:
python_version: "3.10"
python_version: "3.11"

inference_compute_info:
cpu_limit: "1"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ model:
model_type_id: "text-to-text"

build_info:
python_version: "3.10"
python_version: "3.11"

inference_compute_info:
cpu_limit: "1"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ model:
model_type_id: "multimodal-to-text"

build_info:
python_version: "3.10"
python_version: "3.11"

inference_compute_info:
cpu_limit: "1"
Expand Down
3 changes: 1 addition & 2 deletions models/model_upload/ocr/got-ocr2.0/1/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,9 @@ def load_model(self):
checkpoint_path,
trust_remote_code=True,
use_safetensors=True,
device_map="cuda",
device_map= self.device,
low_cpu_mem_usage=True,
pad_token_id=self.tokenizer.eos_token_id)
self.model.eval().cuda()
logger.info("Done loading Model checkpoints!")

def predict(self, request: service_pb2.PostModelOutputsRequest
Expand Down
2 changes: 1 addition & 1 deletion models/model_upload/ocr/got-ocr2.0/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ model:
model_type_id: "image-to-text"

build_info:
python_version: "3.10"
python_version: "3.11"

inference_compute_info:
cpu_limit: "1"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ model:
model_type_id: "audio-to-text"

build_info:
python_version: "3.10"
python_version: "3.12"

inference_compute_info:
cpu_limit: "1"
Expand Down
3 changes: 1 addition & 2 deletions models/model_upload/test-upload/mbart/1/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def load_model(self):
# if checkpoints section is in config.yaml file then checkpoints will be downloaded at this path during model upload time.
self.tokenizer = AutoTokenizer.from_pretrained(checkpoints)
self.model = AutoModelForSeq2SeqLM.from_pretrained(
checkpoints, torch_dtype="auto", device_map="auto")
checkpoints, torch_dtype="auto", device_map=self.device)

def predict(self, request: service_pb2.PostModelOutputsRequest
) -> Iterator[service_pb2.MultiOutputResponse]:
Expand All @@ -51,7 +51,6 @@ def predict(self, request: service_pb2.PostModelOutputsRequest
raw_texts = []
for t in texts:
inputs = self.tokenizer.encode(t, return_tensors="pt").to(self.device)
# inputs = self.tokenizer.encode("Translate to English: Je t'aime.", return_tensors="pt").to(self.device)
outputs = self.model.generate(inputs)
print(self.tokenizer.decode(outputs[0]))
raw_texts.append(self.tokenizer.decode(outputs[0]))
Expand Down

0 comments on commit 1c0799d

Please sign in to comment.