#2023 commented the duplicate model load

SeldonIO · Jan 20, 2025 · ffbcd1f · ffbcd1f
1 parent 44ebb1e
commit ffbcd1f
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/runtimes/huggingface/mlserver_huggingface/runtime.py b/runtimes/huggingface/mlserver_huggingface/runtime.py
@@ -25,15 +25,15 @@ def __init__(self, settings: ModelSettings):
     async def load(self) -> bool:
         # Loading & caching pipeline in asyncio loop to avoid blocking
         logger.info(f"Loading model for task '{self.hf_settings.task_name}'...")
-        await asyncio.get_running_loop().run_in_executor(
+        self._model = await asyncio.get_running_loop().run_in_executor(
             None,
             load_pipeline_from_settings,
             self.hf_settings,
             self.settings,
         )
 
         # Now we load the cached model which should not block asyncio
-        self._model = load_pipeline_from_settings(self.hf_settings, self.settings)
+        # self._model = load_pipeline_from_settings(self.hf_settings, self.settings)
         self._merge_metadata()
         return True