diff --git a/runtimes/huggingface/mlserver_huggingface/runtime.py b/runtimes/huggingface/mlserver_huggingface/runtime.py
index 2e1316ddb..97260649f 100644
--- a/runtimes/huggingface/mlserver_huggingface/runtime.py
+++ b/runtimes/huggingface/mlserver_huggingface/runtime.py
@@ -25,7 +25,7 @@ def __init__(self, settings: ModelSettings):
     async def load(self) -> bool:
         # Loading & caching pipeline in asyncio loop to avoid blocking
         logger.info(f"Loading model for task '{self.hf_settings.task_name}'...")
-        await asyncio.get_running_loop().run_in_executor(
+        self._model = await asyncio.get_running_loop().run_in_executor(
             None,
             load_pipeline_from_settings,
             self.hf_settings,
@@ -33,7 +33,7 @@ async def load(self) -> bool:
         )
 
         # Now we load the cached model which should not block asyncio
-        self._model = load_pipeline_from_settings(self.hf_settings, self.settings)
+        # self._model = load_pipeline_from_settings(self.hf_settings, self.settings)
         self._merge_metadata()
         return True