Fix Dataprep Ingest Data Issue. (#1271)

* Fix Dataprep Ingest Data Issue. Trace: 1. The update of `langchain_huggingface.HuggingFaceEndpointEmbeddings` caused the wrong size of embedding vectors. 2. Wrong size vectors are wrongly saved into Redis database, and the indices are not created correctly. 3. The retriever can not retrieve data from Redis using index due to the reasons above. 4. Then the RAG seems `not work`, for the file uploaded can not be retrieved from database. Solution: Replace all of the `langchain_huggingface.HuggingFaceEndpointEmbeddings` to `langchain_community.embeddings.HuggingFaceInferenceAPIEmbeddings`, and modify related READMEs and scirpts. Related issue: - opea-project/GenAIExamples#1473 - opea-project/GenAIExamples#1482 --------- Signed-off-by: letonghan <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
opea-project · Feb 10, 2025 · b777db7 · b777db7
1 parent 0df374b
commit b777db7
Show file tree

Hide file tree

Showing 19 changed files with 199 additions and 59 deletions.
diff --git a/comps/dataprep/deployment/docker_compose/compose.yaml b/comps/dataprep/deployment/docker_compose/compose.yaml
@@ -28,7 +28,7 @@ services:
       DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_ELASTICSEARCH"
       ES_CONNECTION_STRING: ${ES_CONNECTION_STRING}
       INDEX_NAME: ${INDEX_NAME}
-      TEI_ENDPOINT: ${TEI_ENDPOINT}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
       HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
     restart: unless-stopped
     depends_on:
@@ -49,6 +49,7 @@ services:
       MILVUS_HOST: ${MILVUS_HOST}
       TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
       HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      LOGFLAG: ${LOGFLAG}
     restart: unless-stopped
     depends_on:
       tei-embedding-serving:
@@ -161,7 +162,7 @@ services:
       QDRANT_HOST: ${QDRANT_HOST}
       QDRANT_PORT: ${QDRANT_PORT}
       COLLECTION_NAME: ${COLLECTION_NAME}
-      TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
       HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
     restart: unless-stopped
 

diff --git a/comps/dataprep/src/README_elasticsearch.md b/comps/dataprep/src/README_elasticsearch.md
@@ -50,7 +50,7 @@ docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --buil
 ### 2.4 Run Docker with CLI (Option A)
 
 ```bash
-docker run  --name="dataprep-elasticsearch" -p 6011:6011 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e ES_CONNECTION_STRING=$ES_CONNECTION_STRING  -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_ELASTICSEARCH" opea/dataprep:latest
+docker run  --name="dataprep-elasticsearch" -p 6011:6011 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e ES_CONNECTION_STRING=$ES_CONNECTION_STRING  -e INDEX_NAME=$INDEX_NAME -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_ELASTICSEARCH" opea/dataprep:latest
 ```
 
 ### 2.5 Run with Docker Compose (Option B)

diff --git a/comps/dataprep/src/README_milvus.md b/comps/dataprep/src/README_milvus.md
@@ -25,6 +25,7 @@ export MILVUS_HOST=${your_milvus_host_ip}
 export MILVUS_PORT=19530
 export COLLECTION_NAME=${your_collection_name}
 export TEI_EMBEDDING_ENDPOINT=${your_embedding_endpoint}
+export HUGGINGFACEHUB_API_TOKEN=${your_huggingface_api_token}
 ```
 
 ### 1.4 Start TEI Embedding Service
@@ -70,13 +71,15 @@ docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --buil
 
 ```bash
 export TEI_EMBEDDING_ENDPOINT="http://localhost:$your_port"
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export EMBEDDING_MODEL_ID=${your_embedding_model_id}
 export MILVUS_HOST=${your_host_ip}
 ```
 
 ### 2.3 Run Docker with CLI (Option A)
 
 ```bash
-docker run -d --name="dataprep-milvus-server" -p 6010:6010 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} -e MILVUS_HOST=${MILVUS_HOST} -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_MILVUS" opea/dataprep:latest
+docker run -d --name="dataprep-milvus-server" -p 6010:6010 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} -e MILVUS_HOST=${MILVUS_HOST} -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_MILVUS" opea/dataprep:latest
 ```
 
 ### 2.4 Run with Docker Compose (Option B)

diff --git a/comps/dataprep/src/README_opensearch.md b/comps/dataprep/src/README_opensearch.md
@@ -51,7 +51,7 @@ curl localhost:$your_port/embed \
 After checking that it works, set up environment variables.
 
 ```bash
-export TEI_ENDPOINT="http://localhost:$your_port"
+export TEI_EMBEDDING_ENDPOINT="http://localhost:$your_port"
 ```
 
 ### 1.4 Start Document Preparation Microservice for OpenSearch with Python Script
@@ -75,7 +75,7 @@ Please refer to this [readme](../../third_parties/opensearch/src/README.md).
 
 ```bash
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
-export TEI_ENDPOINT="http://${your_ip}:6006"
+export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6006"
 export OPENSEARCH_URL="http://${your_ip}:9200"
 export INDEX_NAME=${your_index_name}
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
@@ -97,7 +97,7 @@ docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --buil
 - option 1: Start single-process version (for processing up to 10 files)
 
 ```bash
-docker run -d --name="dataprep-opensearch-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e OPENSEARCH_URL=$OPENSEARCH_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_OPENSEARCH" opea/dataprep:latest
+docker run -d --name="dataprep-opensearch-server" -p 6007:6007 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e OPENSEARCH_URL=$OPENSEARCH_URL -e INDEX_NAME=$INDEX_NAME -e EMBED_MODEL=${EMBED_MODEL} -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_OPENSEARCH" opea/dataprep:latest
 ```
 
 ### 2.5 Run with Docker Compose (Option B - deprecated, will move to genAIExample in future)

diff --git a/comps/dataprep/src/README_pgvector.md b/comps/dataprep/src/README_pgvector.md
@@ -38,6 +38,8 @@ Please refer to this [readme](../../third_parties/pgvector/src/README.md).
 ```bash
 export PG_CONNECTION_STRING=postgresql+psycopg2://testuser:testpwd@${your_ip}:5432/vectordb
 export INDEX_NAME=${your_index_name}
+export TEI_EMBEDDING_ENDPOINT=${your_tei_embedding_endpoint}
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 ```
 
 ### 2.3 Build Docker Image
@@ -50,7 +52,7 @@ docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --buil
 ### 2.4 Run Docker with CLI (Option A)
 
 ```bash
-docker run  --name="dataprep-pgvector" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=$PG_CONNECTION_STRING  -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_PGVECTOR" opea/dataprep:latest
+docker run  --name="dataprep-pgvector" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e PG_CONNECTION_STRING=$PG_CONNECTION_STRING  -e INDEX_NAME=$INDEX_NAME -e EMBED_MODEL=${EMBED_MODEL} -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_PGVECTOR" opea/dataprep:latest
 ```
 
 ### 2.5 Run with Docker Compose (Option B)

diff --git a/comps/dataprep/src/README_redis.md b/comps/dataprep/src/README_redis.md
@@ -95,8 +95,7 @@ Please refer to this [readme](../../third_parties/redis/src/README.md).
 ### 2.2 Setup Environment Variables
 
 ```bash
-export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
-export TEI_ENDPOINT="http://${your_ip}:6006"
+export TEI_EMBEDDING_ENDPOINT="http://${your_ip}:6006"
 export REDIS_URL="redis://${your_ip}:6379"
 export INDEX_NAME=${your_index_name}
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
@@ -112,7 +111,7 @@ docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --buil
 ### 2.4 Run Docker with CLI (Option A)
 
 ```bash
-docker run -d --name="dataprep-redis-server" -p 6007:5000 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_ENDPOINT=$TEI_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/dataprep:latest
+docker run -d --name="dataprep-redis-server" -p 6007:5000 --runtime=runc --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e REDIS_URL=$REDIS_URL -e INDEX_NAME=$INDEX_NAME -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN opea/dataprep:latest
 ```
 
 ### 2.5 Run with Docker Compose (Option B - deprecated, will move to genAIExample in future)

diff --git a/comps/dataprep/src/README_vdms.md b/comps/dataprep/src/README_vdms.md
@@ -69,7 +69,8 @@ export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
 export VDMS_HOST=${host_ip}
 export VDMS_PORT=55555
-export TEI_ENDPOINT=${your_tei_endpoint}
+export TEI_EMBEDDING_ENDPOINT=${your_tei_endpoint}
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export COLLECTION_NAME=${your_collection_name}
 export SEARCH_ENGINE="FaissFlat"
 export DISTANCE_STRATEGY="L2"
@@ -89,7 +90,8 @@ Start single-process version (for 1-10 files processing)
 
 ```bash
 docker run -d --name="dataprep-vdms-server" -p 6007:6007 --runtime=runc --ipc=host \
--e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_ENDPOINT=$TEI_ENDPOINT \
+-e http_proxy=$http_proxy -e https_proxy=$https_proxy \
+-e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \
 -e COLLECTION_NAME=$COLLECTION_NAME -e VDMS_HOST=$VDMS_HOST -e VDMS_PORT=$VDMS_PORT \
 -e DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_VDMS" opea/dataprep:latest
 ```

diff --git a/comps/dataprep/src/integrations/elasticsearch.py b/comps/dataprep/src/integrations/elasticsearch.py
@@ -9,10 +9,9 @@
 from elasticsearch import Elasticsearch
 from fastapi import Body, File, Form, HTTPException, UploadFile
 from langchain.text_splitter import HTMLHeaderTextSplitter, RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceBgeEmbeddings
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
 from langchain_core.documents import Document
 from langchain_elasticsearch import ElasticsearchStore
-from langchain_huggingface.embeddings import HuggingFaceEndpointEmbeddings
 
 from comps import CustomLogger, DocPath, OpeaComponent, OpeaComponentRegistry, ServiceType
 from comps.dataprep.src.utils import (
@@ -37,7 +36,9 @@
 EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5")
 
 # TEI Embedding endpoints
-TEI_ENDPOINT = os.getenv("TEI_ENDPOINT", "")
+TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "")
+# Huggingface API token for TEI embedding endpoint
+HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
 
 # Vector Index Configuration
 INDEX_NAME = os.getenv("INDEX_NAME", "rag-elastic")
@@ -77,15 +78,31 @@ def create_index(self) -> None:
         if not self.es_client.indices.exists(index=INDEX_NAME):
             self.es_client.indices.create(index=INDEX_NAME)
 
-    def get_embedder(self) -> Union[HuggingFaceEndpointEmbeddings, HuggingFaceBgeEmbeddings]:
+    def get_embedder(self) -> Union[HuggingFaceInferenceAPIEmbeddings, HuggingFaceBgeEmbeddings]:
         """Obtain required Embedder."""
-        if TEI_ENDPOINT:
-            return HuggingFaceEndpointEmbeddings(model=TEI_ENDPOINT)
+        if TEI_EMBEDDING_ENDPOINT:
+            if not HUGGINGFACEHUB_API_TOKEN:
+                raise HTTPException(
+                    status_code=400,
+                    detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` and the `EMBED_MODEL` when using `TEI_EMBEDDING_ENDPOINT`.",
+                )
+            import requests
+
+            response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info")
+            if response.status_code != 200:
+                raise HTTPException(
+                    status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available."
+                )
+            model_id = response.json()["model_id"]
+            embedder = HuggingFaceInferenceAPIEmbeddings(
+                api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT
+            )
+            return embedder
         else:
             return HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
 
     def get_elastic_store(
-        self, embedder: Union[HuggingFaceEndpointEmbeddings, HuggingFaceBgeEmbeddings]
+        self, embedder: Union[HuggingFaceInferenceAPIEmbeddings, HuggingFaceBgeEmbeddings]
     ) -> ElasticsearchStore:
         """Get Elasticsearch vector store."""
         return ElasticsearchStore(index_name=INDEX_NAME, embedding=embedder, es_connection=self.es_client)

diff --git a/comps/dataprep/src/integrations/milvus.py b/comps/dataprep/src/integrations/milvus.py
@@ -10,7 +10,7 @@
 
 from fastapi import Body, File, Form, HTTPException, UploadFile
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings, OpenAIEmbeddings
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings, OpenAIEmbeddings
 from langchain_core.documents import Document
 from langchain_milvus.vectorstores import Milvus
 from langchain_text_splitters import HTMLHeaderTextSplitter
@@ -36,8 +36,11 @@
 # Local Embedding model
 LOCAL_EMBEDDING_MODEL = os.getenv("LOCAL_EMBEDDING_MODEL", "maidalun1020/bce-embedding-base_v1")
 # TEI configuration
-TEI_EMBEDDING_MODEL = os.environ.get("TEI_EMBEDDING_MODEL", "/home/user/bge-large-zh-v1.5")
+EMBED_MODEL = os.environ.get("EMBED_MODEL", "BAAI/bge-base-en-v1.5")
 TEI_EMBEDDING_ENDPOINT = os.environ.get("TEI_EMBEDDING_ENDPOINT", "")
+# Huggingface API token for TEI embedding endpoint
+HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
+
 # MILVUS configuration
 MILVUS_HOST = os.getenv("MILVUS_HOST", "localhost")
 MILVUS_PORT = int(os.getenv("MILVUS_PORT", 19530))
@@ -75,7 +78,7 @@ def ingest_chunks_to_milvus(embeddings, file_name: str, chunks: List):
         except Exception as e:
             if logflag:
                 logger.info(f"[ ingest chunks ] fail to ingest chunks into Milvus. error: {e}")
-            raise HTTPException(status_code=500, detail=f"Fail to store chunks of file {file_name}.")
+            raise HTTPException(status_code=500, detail=f"Fail to store chunks of file {file_name}: {e}")
 
     if logflag:
         logger.info(f"[ ingest chunks ] Docs ingested file {file_name} to Milvus collection {COLLECTION_NAME}.")
@@ -189,7 +192,23 @@ def _initialize_embedder(self):
             # create embeddings using TEI endpoint service
             if logflag:
                 logger.info(f"[ milvus embedding ] TEI_EMBEDDING_ENDPOINT:{TEI_EMBEDDING_ENDPOINT}")
-            embeddings = HuggingFaceHubEmbeddings(model=TEI_EMBEDDING_ENDPOINT)
+            if not HUGGINGFACEHUB_API_TOKEN:
+                raise HTTPException(
+                    status_code=400,
+                    detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.",
+                )
+            import requests
+
+            response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info")
+            if response.status_code != 200:
+                raise HTTPException(
+                    status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available."
+                )
+            model_id = response.json()["model_id"]
+            # create embeddings using TEI endpoint service
+            embeddings = HuggingFaceInferenceAPIEmbeddings(
+                api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT
+            )
         else:
             # create embeddings using local embedding model
             if logflag:
@@ -274,7 +293,7 @@ async def ingest_files(
                         search_res = search_by_file(my_milvus.col, encode_file)
                     except Exception as e:
                         raise HTTPException(
-                            status_code=500, detail=f"Failed when searching in Milvus db for file {file.filename}."
+                            status_code=500, detail=f"Failed when searching in Milvus db for file {file.filename}: {e}"
                         )
                     if len(search_res) > 0:
                         if logflag:
@@ -319,7 +338,7 @@ async def ingest_files(
                         search_res = search_by_file(my_milvus.col, encoded_link + ".txt")
                     except Exception as e:
                         raise HTTPException(
-                            status_code=500, detail=f"Failed when searching in Milvus db for link {link}."
+                            status_code=500, detail=f"Failed when searching in Milvus db for link {link}: {e}"
                         )
                     if len(search_res) > 0:
                         if logflag:
@@ -375,7 +394,7 @@ async def get_files(self):
         try:
             all_data = search_all(my_milvus.col)
         except Exception as e:
-            raise HTTPException(status_code=500, detail="Failed when searching in Milvus db for all files.")
+            raise HTTPException(status_code=500, detail=f"Failed when searching in Milvus db for all files: {e}")
 
         # return [] if no data in db
         if len(all_data) == 0:
@@ -422,8 +441,7 @@ async def delete_files(self, file_path: str = Body(..., embed=True)):
             except Exception as e:
                 if logflag:
                     logger.info(f"[ milvus delete ] {e}. Fail to delete {upload_folder}.")
-                raise HTTPException(status_code=500, detail=f"Fail to delete {upload_folder}.")
-
+                raise HTTPException(status_code=500, detail=f"Fail to delete {upload_folder}: {e}")
             if logflag:
                 logger.info("[ milvus delete ] successfully delete all files.")
 

diff --git a/comps/dataprep/src/integrations/neo4j_langchain.py b/comps/dataprep/src/integrations/neo4j_langchain.py
@@ -34,10 +34,9 @@
 NEO4J_USERNAME = os.getenv("NEO4J_USERNAME", "neo4j")
 NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "test")
 
-# LLM/Embedding endpoints
+# LLM endpoints
 TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080")
 TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081")
-TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_ENDPOINT")
 OPENAI_KEY = os.getenv("OPENAI_API_KEY")
 
 

diff --git a/comps/dataprep/src/integrations/opensearch.py b/comps/dataprep/src/integrations/opensearch.py
@@ -7,9 +7,8 @@
 
 from fastapi import Body, File, Form, HTTPException, UploadFile
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceBgeEmbeddings
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
 from langchain_community.vectorstores import OpenSearchVectorSearch
-from langchain_huggingface import HuggingFaceEndpointEmbeddings
 from langchain_text_splitters import HTMLHeaderTextSplitter
 from opensearchpy import OpenSearch
 
@@ -79,9 +78,26 @@ def __init__(self, name: str, description: str, config: dict = None):
         self.upload_folder = "./uploaded_files/"
         super().__init__(name, ServiceType.DATAPREP.name.lower(), description, config)
         # Initialize embeddings
-        tei_embedding_endpoint = os.getenv("TEI_ENDPOINT")
-        if tei_embedding_endpoint:
-            self.embeddings = HuggingFaceEndpointEmbeddings(model=tei_embedding_endpoint)
+        TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", "")
+        HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
+        EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5")
+        if TEI_EMBEDDING_ENDPOINT:
+            if not HUGGINGFACEHUB_API_TOKEN:
+                raise HTTPException(
+                    status_code=400,
+                    detail="You MUST offer the `HUGGINGFACEHUB_API_TOKEN` when using `TEI_EMBEDDING_ENDPOINT`.",
+                )
+            import requests
+
+            response = requests.get(TEI_EMBEDDING_ENDPOINT + "/info")
+            if response.status_code != 200:
+                raise HTTPException(
+                    status_code=400, detail=f"TEI embedding endpoint {TEI_EMBEDDING_ENDPOINT} is not available."
+                )
+            model_id = response.json()["model_id"]
+            self.embeddings = HuggingFaceInferenceAPIEmbeddings(
+                api_key=HUGGINGFACEHUB_API_TOKEN, model_name=model_id, api_url=TEI_EMBEDDING_ENDPOINT
+            )
         else:
             self.embeddings = HuggingFaceBgeEmbeddings(model_name=Config.EMBED_MODEL)