diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/README.md b/AudioQnA/docker_compose/intel/cpu/xeon/README.md
index d5796b8c97..9ff29bc378 100644
--- a/AudioQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/README.md
@@ -49,7 +49,7 @@ Before starting the services with `docker compose`, you have to recheck the foll
 
 ```bash
 export host_ip=<your External Public IP>    # export host_ip=$(hostname -I | awk '{print $1}')
-export HUGGINGFACEHUB_API_TOKEN=<your HF token>
+export HF_TOKEN=<your HF token>
 
 export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
 
diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh b/AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh
index e98f6e04ec..b65dbf4403 100644
--- a/AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -5,7 +5,11 @@
 
 # export host_ip=<your External Public IP>
 export host_ip=$(hostname -I | awk '{print $1}')
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+
+if [ -z "$HF_TOKEN" ]; then
+    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
+    return -1
+fi
 # <token>
 
 export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/README.md b/AudioQnA/docker_compose/intel/hpu/gaudi/README.md
index b60253a147..4b0d15ab7b 100644
--- a/AudioQnA/docker_compose/intel/hpu/gaudi/README.md
+++ b/AudioQnA/docker_compose/intel/hpu/gaudi/README.md
@@ -49,7 +49,7 @@ Before starting the services with `docker compose`, you have to recheck the foll
 
 ```bash
 export host_ip=<your External Public IP>    # export host_ip=$(hostname -I | awk '{print $1}')
-export HUGGINGFACEHUB_API_TOKEN=<your HF token>
+export HF_TOKEN=<your HF token>
 
 export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
 
diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml
index 2624dbf531..10118931be 100644
--- a/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -45,7 +45,8 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
       HABANA_VISIBLE_DEVICES: all
diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh b/AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh
index e98f6e04ec..fd6ab20591 100644
--- a/AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -5,7 +5,13 @@
 
 # export host_ip=<your External Public IP>
 export host_ip=$(hostname -I | awk '{print $1}')
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+
+if [ -z "$HF_TOKEN" ]; then
+    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
+    return -1
+fi
+
+export HF_TOKEN=${HF_TOKEN}
 # <token>
 
 export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
diff --git a/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md b/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md
index 32b8c19ae2..50f2ffaf98 100644
--- a/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md
+++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md
@@ -58,7 +58,7 @@ Then run the command `docker images`, you will have following images ready:
 Before starting the services with `docker compose`, you have to recheck the following environment variables.
 
 ```bash
-export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
+export HF_TOKEN=<your_hf_token>
 export host_ip=$(hostname -I | awk '{print $1}')
 
 export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
@@ -173,7 +173,7 @@ In the current version v1.0, you need to set the avatar figure image/video and t
 cd GenAIExamples/AvatarChatbot/tests
 export IMAGE_REPO="opea"
 export IMAGE_TAG="latest"
-export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
+export HF_TOKEN=<your_hf_token>
 
 test_avatarchatbot_on_xeon.sh
 ```
diff --git a/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml b/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
index 12225ec416..b017a8c276 100644
--- a/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
@@ -37,7 +37,7 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
     healthcheck:
       test: ["CMD-SHELL", "curl -f http://${host_ip}:3006/health || exit 1"]
       interval: 10s
diff --git a/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md b/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md
index 68ae44fb20..338a53573b 100644
--- a/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md
+++ b/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md
@@ -58,7 +58,7 @@ Then run the command `docker images`, you will have following images ready:
 Before starting the services with `docker compose`, you have to recheck the following environment variables.
 
 ```bash
-export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
+export HF_TOKEN=<your_hf_token>
 export host_ip=$(hostname -I | awk '{print $1}')
 
 export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
@@ -183,7 +183,7 @@ In the current version v1.0, you need to set the avatar figure image/video and t
 cd GenAIExamples/AvatarChatbot/tests
 export IMAGE_REPO="opea"
 export IMAGE_TAG="latest"
-export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
+export HF_TOKEN=<your_hf_token>
 
 test_avatarchatbot_on_gaudi.sh
 ```
diff --git a/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml b/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml
index 799510d0ab..fe6e588ed8 100644
--- a/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -38,7 +38,7 @@ services:
       - SYS_NICE
     restart: unless-stopped
   tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
+    image: ghcr.io/huggingface/tgi-gaudi:2.3.1
     container_name: tgi-gaudi-server
     ports:
       - "3006:80"
@@ -48,7 +48,8 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
       HABANA_VISIBLE_DEVICES: all
diff --git a/ChatQnA/docker_compose/intel/cpu/aipc/README.md b/ChatQnA/docker_compose/intel/cpu/aipc/README.md
index 201819e656..895659f7c8 100644
--- a/ChatQnA/docker_compose/intel/cpu/aipc/README.md
+++ b/ChatQnA/docker_compose/intel/cpu/aipc/README.md
@@ -105,7 +105,7 @@ export https_proxy=${your_http_proxy}
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export INDEX_NAME="rag-redis"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export HF_TOKEN=${your_hf_api_token}
 export OLLAMA_HOST=${host_ip}
 export OLLAMA_MODEL="llama3.2"
 ```
@@ -116,7 +116,7 @@ export OLLAMA_MODEL="llama3.2"
 set EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
 set RERANK_MODEL_ID=BAAI/bge-reranker-base
 set INDEX_NAME=rag-redis
-set HUGGINGFACEHUB_API_TOKEN=%your_hf_api_token%
+set HF_TOKEN=%your_hf_api_token%
 set OLLAMA_HOST=host.docker.internal
 set OLLAMA_MODEL="llama3.2"
 ```
diff --git a/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml b/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml
index f765d3aa51..0906503ae0 100644
--- a/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml
@@ -24,7 +24,8 @@ services:
       REDIS_HOST: redis-vector-db
       INDEX_NAME: ${INDEX_NAME}
       TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
   tei-embedding-service:
     image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
     container_name: tei-embedding-server
@@ -54,7 +55,8 @@ services:
       REDIS_HOST: redis-vector-db
       INDEX_NAME: ${INDEX_NAME}
       TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       LOGFLAG: ${LOGFLAG}
       RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
     restart: unless-stopped
@@ -70,7 +72,8 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
     command: --model-id ${RERANK_MODEL_ID} --auto-truncate
diff --git a/ChatQnA/docker_compose/intel/cpu/aipc/set_env.sh b/ChatQnA/docker_compose/intel/cpu/aipc/set_env.sh
index 639327b3bf..1529edf32b 100644
--- a/ChatQnA/docker_compose/intel/cpu/aipc/set_env.sh
+++ b/ChatQnA/docker_compose/intel/cpu/aipc/set_env.sh
@@ -7,15 +7,11 @@ pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null
 
-if [ -z "${your_hf_api_token}" ]; then
-    echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set your_hf_api_token."
+if [ -z "${HF_TOKEN}" ]; then
+    echo "Error: HF_TOKEN is not set. Please set HF_TOKEN."
 fi
 
-if [ -z "${host_ip}" ]; then
-    echo "Error: host_ip is not set. Please set host_ip first."
-fi
-
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export host_ip=$(hostname -I | awk '{print $1}')
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export INDEX_NAME="rag-redis"
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README.md b/ChatQnA/docker_compose/intel/cpu/xeon/README.md
index 764afba4d4..296632848e 100644
--- a/ChatQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/README.md
@@ -21,7 +21,7 @@ To set up environment variables for deploying ChatQnA services, follow these ste
    ```bash
    # Example: host_ip="192.168.1.1"
    export host_ip="External_Public_IP"
-   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
+   export HF_TOKEN="Your_Huggingface_API_Token"
    ```
 
 2. If you are in a proxy environment, also set the proxy-related environment variables:
@@ -228,7 +228,7 @@ For users in China who are unable to download models directly from Huggingface,
    ```bash
    # Example: host_ip="192.168.1.1"
    export host_ip="External_Public_IP"
-   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
+   export HF_TOKEN="Your_Huggingface_API_Token"
    # Example: NGINX_PORT=80
    export NGINX_PORT=${your_nginx_port}
    ```
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
index 3c3e6f49a7..4d17e98c6e 100644
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -24,7 +24,8 @@ services:
       REDIS_HOST: redis-vector-db
       INDEX_NAME: ${INDEX_NAME}
       TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
   tei-embedding-service:
     image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
     container_name: tei-embedding-server
@@ -54,7 +55,8 @@ services:
       REDIS_HOST: redis-vector-db
       INDEX_NAME: ${INDEX_NAME}
       TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       LOGFLAG: ${LOGFLAG}
       RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
     restart: unless-stopped
@@ -70,7 +72,8 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
     command: --model-id ${RERANK_MODEL_ID} --auto-truncate
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh b/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh
index cc6779cf7a..f64836f75a 100755
--- a/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -7,6 +7,11 @@ pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null
 
+if [ -z "$HF_TOKEN" ]; then
+    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
+    return -1
+fi
+
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md
index b0e9fe19b1..23fd6e116c 100644
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md
@@ -21,7 +21,7 @@ To set up environment variables for deploying ChatQnA services, follow these ste
    ```bash
    # Example: host_ip="192.168.1.1"
    export host_ip="External_Public_IP"
-   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
+   export HF_TOKEN="Your_Huggingface_API_Token"
    ```
 
 2. If you are in a proxy environment, also set the proxy-related environment variables:
@@ -197,9 +197,9 @@ For users in China who are unable to download models directly from Huggingface,
    export HF_ENDPOINT="https://hf-mirror.com"
    model_name="meta-llama/Meta-Llama-3-8B-Instruct"
    # Start vLLM LLM Service
-   docker run -p 8007:80 -v ./data:/data --name vllm-gaudi-server -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e VLLM_TORCH_PROFILER_DIR="/mnt" --cap-add=sys_nice --ipc=host opea/vllm-gaudi:latest --model $model_name --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
+   docker run -p 8007:80 -v ./data:/data --name vllm-gaudi-server -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e HF_TOKEN=$HF_TOKEN -e VLLM_TORCH_PROFILER_DIR="/mnt" --cap-add=sys_nice --ipc=host opea/vllm-gaudi:latest --model $model_name --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
    # Start TGI LLM Service
-   docker run -p 8005:80 -v ./data:/data --name tgi-gaudi-server -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id $model_name --max-input-tokens 1024 --max-total-tokens 2048
+   docker run -p 8005:80 -v ./data:/data --name tgi-gaudi-server -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e HF_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id $model_name --max-input-tokens 1024 --max-total-tokens 2048
    ```
 
 2. Offline
@@ -214,9 +214,9 @@ For users in China who are unable to download models directly from Huggingface,
      export HF_TOKEN=${your_hf_token}
      export model_path="/path/to/model"
      # Start vLLM LLM Service
-     docker run -p 8007:80 -v $model_path:/data --name vllm-gaudi-server --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e VLLM_TORCH_PROFILER_DIR="/mnt" --cap-add=sys_nice --ipc=host opea/vllm-gaudi:latest --model /data --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
+     docker run -p 8007:80 -v $model_path:/data --name vllm-gaudi-server --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e HF_TOKEN=$HF_TOKEN -e VLLM_TORCH_PROFILER_DIR="/mnt" --cap-add=sys_nice --ipc=host opea/vllm-gaudi:latest --model /data --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
      # Start TGI LLM Service
-     docker run -p 8005:80 -v $model_path:/data --name tgi-gaudi-server --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id /data --max-input-tokens 1024 --max-total-tokens 2048
+     docker run -p 8005:80 -v $model_path:/data --name tgi-gaudi-server --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e HF_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id /data --max-input-tokens 1024 --max-total-tokens 2048
      ```
 
 ### Setup Environment Variables
@@ -226,7 +226,7 @@ For users in China who are unable to download models directly from Huggingface,
    ```bash
    # Example: host_ip="192.168.1.1"
    export host_ip="External_Public_IP"
-   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
+   export HF_TOKEN="Your_Huggingface_API_Token"
    # Example: NGINX_PORT=80
    export NGINX_PORT=${your_nginx_port}
    ```
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml
index b75312824e..0166a1eb6d 100644
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -24,7 +24,8 @@ services:
       REDIS_HOST: redis-vector-db
       INDEX_NAME: ${INDEX_NAME}
       TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
   tei-embedding-service:
     image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
     container_name: tei-embedding-gaudi-server
@@ -54,7 +55,8 @@ services:
       REDIS_HOST: redis-vector-db
       INDEX_NAME: ${INDEX_NAME}
       TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
     restart: unless-stopped
   tei-reranking-service:
     image: ghcr.io/huggingface/tei-gaudi:1.5.0
@@ -88,7 +90,7 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       HABANA_VISIBLE_DEVICES: all
       OMPI_MCA_btl_vader_single_copy_mechanism: none
       LLM_MODEL_ID: ${LLM_MODEL_ID}
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh b/ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh
index 27339c478f..4aaa8a89f3 100644
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -6,6 +6,10 @@ pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null
 
+if [ -z "$HF_TOKEN" ]; then
+    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
+    return -1
+fi
 
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
diff --git a/CodeGen/docker_compose/intel/cpu/xeon/README.md b/CodeGen/docker_compose/intel/cpu/xeon/README.md
index 01ee5d1fa4..ed08386d2a 100644
--- a/CodeGen/docker_compose/intel/cpu/xeon/README.md
+++ b/CodeGen/docker_compose/intel/cpu/xeon/README.md
@@ -101,7 +101,7 @@ export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
 export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export HF_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7778/v1/codegen"
diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
index 28940c9ba4..b6c7209549 100644
--- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -14,7 +14,7 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       host_ip: ${host_ip}
     healthcheck:
       test: ["CMD-SHELL", "curl -f http://$host_ip:8028/health || exit 1"]
@@ -37,7 +37,8 @@ services:
       https_proxy: ${https_proxy}
       LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
     restart: unless-stopped
   codegen-xeon-backend-server:
     image: ${REGISTRY:-opea}/codegen:${TAG:-latest}
diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/README.md b/CodeGen/docker_compose/intel/hpu/gaudi/README.md
index 106f7d1ffc..4ed3c2f366 100644
--- a/CodeGen/docker_compose/intel/hpu/gaudi/README.md
+++ b/CodeGen/docker_compose/intel/hpu/gaudi/README.md
@@ -87,7 +87,7 @@ export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
 export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export HF_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7778/v1/codegen"
diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml
index 4d5ed95683..9695261557 100644
--- a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -15,7 +15,8 @@ services:
       https_proxy: ${https_proxy}
       HABANA_VISIBLE_DEVICES: all
       OMPI_MCA_btl_vader_single_copy_mechanism: none
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       ENABLE_HPU_GRAPH: true
       LIMIT_HPU_GRAPH: true
       USE_FLASH_ATTENTION: true
@@ -45,7 +46,8 @@ services:
       https_proxy: ${https_proxy}
       LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
     restart: unless-stopped
   codegen-gaudi-backend-server:
     image: ${REGISTRY:-opea}/codegen:${TAG:-latest}
diff --git a/CodeGen/docker_compose/set_env.sh b/CodeGen/docker_compose/set_env.sh
index 3144ef9589..9e03cdb09a 100644
--- a/CodeGen/docker_compose/set_env.sh
+++ b/CodeGen/docker_compose/set_env.sh
@@ -6,7 +6,12 @@ pushd "../../" > /dev/null
 source .set_env.sh
 popd > /dev/null
 
+if [ -z "$HF_TOKEN" ]; then
+    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
+    return -1
+fi
 
+export host_ip=$(hostname -I | awk '{print $1}')
 export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
 export MEGA_SERVICE_HOST_IP=${host_ip}
diff --git a/CodeTrans/docker_compose/intel/cpu/xeon/README.md b/CodeTrans/docker_compose/intel/cpu/xeon/README.md
index b5aebe8690..4bbd838d3d 100755
--- a/CodeTrans/docker_compose/intel/cpu/xeon/README.md
+++ b/CodeTrans/docker_compose/intel/cpu/xeon/README.md
@@ -72,7 +72,7 @@ Change the `LLM_MODEL_ID` below for your needs.
    export host_ip="External_Public_IP"
    # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
    export no_proxy="Your_No_Proxy"
-   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
+   export HF_TOKEN="Your_Huggingface_API_Token"
    # Example: NGINX_PORT=80
    export NGINX_PORT=${your_nginx_port}
    ```
diff --git a/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml b/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml
index b818956fa5..cadb7c77db 100644
--- a/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml
@@ -14,7 +14,7 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       host_ip: ${host_ip}
     healthcheck:
       test: ["CMD-SHELL", "curl -f http://$host_ip:8008/health || exit 1"]
@@ -37,7 +37,8 @@ services:
       https_proxy: ${https_proxy}
       LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
     restart: unless-stopped
   codetrans-xeon-backend-server:
     image: ${REGISTRY:-opea}/codetrans:${TAG:-latest}
diff --git a/CodeTrans/docker_compose/intel/hpu/gaudi/README.md b/CodeTrans/docker_compose/intel/hpu/gaudi/README.md
index 00241d6acf..0abbf4b20b 100755
--- a/CodeTrans/docker_compose/intel/hpu/gaudi/README.md
+++ b/CodeTrans/docker_compose/intel/hpu/gaudi/README.md
@@ -64,7 +64,7 @@ Change the `LLM_MODEL_ID` below for your needs.
    export host_ip="External_Public_IP"
    # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
    export no_proxy="Your_No_Proxy"
-   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
+   export HF_TOKEN="Your_Huggingface_API_Token"
    # Example: NGINX_PORT=80
    export NGINX_PORT=${your_nginx_port}
    ```
diff --git a/CodeTrans/docker_compose/intel/hpu/gaudi/compose.yaml b/CodeTrans/docker_compose/intel/hpu/gaudi/compose.yaml
index cbccde0605..53553325dc 100644
--- a/CodeTrans/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/CodeTrans/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -3,7 +3,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
+    image: ghcr.io/huggingface/tgi-gaudi:2.3.1
     container_name: codetrans-tgi-service
     ports:
       - "8008:80"
@@ -15,7 +15,8 @@ services:
       https_proxy: ${https_proxy}
       HABANA_VISIBLE_DEVICES: all
       OMPI_MCA_btl_vader_single_copy_mechanism: none
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       ENABLE_HPU_GRAPH: true
       LIMIT_HPU_GRAPH: true
       USE_FLASH_ATTENTION: true
@@ -45,7 +46,8 @@ services:
       https_proxy: ${https_proxy}
       LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
     restart: unless-stopped
   codetrans-gaudi-backend-server:
     image: ${REGISTRY:-opea}/codetrans:${TAG:-latest}
diff --git a/CodeTrans/docker_compose/set_env.sh b/CodeTrans/docker_compose/set_env.sh
index b44c763a2e..9c34466e08 100644
--- a/CodeTrans/docker_compose/set_env.sh
+++ b/CodeTrans/docker_compose/set_env.sh
@@ -6,6 +6,10 @@ pushd "../../" > /dev/null
 source .set_env.sh
 popd > /dev/null
 
+if [ -z "$HF_TOKEN" ]; then
+    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
+    return -1
+fi
 
 export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
diff --git a/DBQnA/docker_compose/intel/cpu/xeon/README.md b/DBQnA/docker_compose/intel/cpu/xeon/README.md
index 78d5b60419..71791dd104 100644
--- a/DBQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/DBQnA/docker_compose/intel/cpu/xeon/README.md
@@ -36,7 +36,7 @@ Then run the command `docker images`, you will have the following Docker Images:
 
 We set default model as "mistralai/Mistral-7B-Instruct-v0.3", change "LLM_MODEL_ID" in following Environment Variables setting if you want to use other models.
 
-If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable.
+If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HF_TOKEN" environment variable.
 
 ### 2.1 Setup Environment Variables
 
@@ -57,7 +57,7 @@ export https_proxy=${your_http_proxy}
 
 export TGI_PORT=8008
 export TGI_LLM_ENDPOINT=http://${your_ip}:${TGI_PORT}
-export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HF_TOKEN=${HF_TOKEN}
 export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
 export POSTGRES_USER=postgres
 export POSTGRES_PASSWORD=testpwd
@@ -97,7 +97,7 @@ docker run --name test-text2sql-postgres --ipc=host -e POSTGRES_USER=${POSTGRES_
 
 ```bash
 
-docker run -d --name="test-text2sql-tgi-endpoint" --ipc=host -p $TGI_PORT:80 -v ./data:/data --shm-size 1g -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e HF_TOKEN=${HF_TOKEN} -e model=${model} ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id $model
+docker run -d --name="test-text2sql-tgi-endpoint" --ipc=host -p $TGI_PORT:80 -v ./data:/data --shm-size 1g -e HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} -e HF_TOKEN=${HF_TOKEN} -e model=${model} ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id $model
 ```
 
 - Start Text-to-SQL Service
diff --git a/DBQnA/docker_compose/intel/cpu/xeon/compose.yaml b/DBQnA/docker_compose/intel/cpu/xeon/compose.yaml
index 9b2bcbfbaa..1af25294b4 100644
--- a/DBQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/DBQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -15,8 +15,8 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
     shm_size: 1g
     command: --model-id ${LLM_MODEL_ID}
 
diff --git a/DBQnA/docker_compose/set_env.sh b/DBQnA/docker_compose/set_env.sh
index 94ca2186a2..bb051c270f 100755
--- a/DBQnA/docker_compose/set_env.sh
+++ b/DBQnA/docker_compose/set_env.sh
@@ -6,6 +6,11 @@ pushd "../../" > /dev/null
 source .set_env.sh
 popd > /dev/null
 
+if [ -z "$HF_TOKEN" ]; then
+    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
+    return -1
+fi
+
 export TGI_PORT=8008
 export TGI_LLM_ENDPOINT="http://${your_ip}:${TGI_PORT}"
 export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
diff --git a/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md b/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md
index 512f1b160a..32b8e7bf64 100644
--- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md
+++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md
@@ -43,7 +43,7 @@ docker build --no-cache -t opea/doc-index-retriever:latest --build-arg https_pro
 
 ```bash
 export host_ip="YOUR IP ADDR"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export HF_TOKEN=${your_hf_api_token}
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
@@ -67,7 +67,7 @@ In that case, start Docker Containers with compose_without_rerank.yaml
 
 ```bash
 export host_ip="YOUR IP ADDR"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export HF_TOKEN=${your_hf_api_token}
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 cd GenAIExamples/DocIndexRetriever/intel/cpu/xoen/
 docker compose -f compose_without_rerank.yaml up -d
diff --git a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml
index d4bfe0446f..f633615dcf 100644
--- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml
@@ -27,7 +27,8 @@ services:
       REDIS_HOST: ${REDIS_HOST}
       INDEX_NAME: ${INDEX_NAME}
       TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       LOGFLAG: ${LOGFLAG}
   tei-embedding-service:
     image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
@@ -42,7 +43,8 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       host_ip: ${host_ip}
     healthcheck:
       test: ["CMD-SHELL", "curl -f http://$host_ip:6006/health || exit 1"]
@@ -62,7 +64,7 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
       LOGFLAG: ${LOGFLAG}
     restart: unless-stopped
@@ -80,7 +82,8 @@ services:
       https_proxy: ${https_proxy}
       REDIS_URL: ${REDIS_URL}
       INDEX_NAME: ${INDEX_NAME}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
       LOGFLAG: ${LOGFLAG}
       RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
@@ -98,7 +101,8 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
       host_ip: ${host_ip}
@@ -122,7 +126,8 @@ services:
       https_proxy: ${https_proxy}
       RERANK_TYPE: ${RERANK_TYPE}
       TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
       LOGFLAG: ${LOGFLAG}
diff --git a/DocIndexRetriever/docker_compose/intel/cpu/xeon/set_env.sh b/DocIndexRetriever/docker_compose/intel/cpu/xeon/set_env.sh
index e4f5c207ba..5429c51221 100644
--- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/set_env.sh
@@ -5,3 +5,24 @@
 pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null
+
+if [ -z "$HF_TOKEN" ]; then
+    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
+    return -1
+fi
+
+export host_ip=$(hostname -I | awk '{print $1}')
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:8090"
+export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
+export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
+export REDIS_URL="redis://${host_ip}:6379"
+export INDEX_NAME="rag-redis"
+export MEGA_SERVICE_HOST_IP=${host_ip}
+export EMBEDDING_SERVICE_HOST_IP=${host_ip}
+export RETRIEVER_SERVICE_HOST_IP=${host_ip}
+export RERANK_SERVICE_HOST_IP=${host_ip}
+export LLM_SERVICE_HOST_IP=${host_ip}
+export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8000/v1/retrievaltool"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
diff --git a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/README.md b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/README.md
index 433206dfe6..40dc817a2d 100644
--- a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/README.md
+++ b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/README.md
@@ -43,7 +43,7 @@ docker build --no-cache -t opea/doc-index-retriever:latest --build-arg https_pro
 
 ```bash
 export host_ip="YOUR IP ADDR"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export HF_TOKEN=${your_hf_api_token}
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:8090"
diff --git a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml
index eedbe66719..f7b7a02d64 100644
--- a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -26,9 +26,10 @@ services:
       REDIS_URL: ${REDIS_URL}
       INDEX_NAME: ${INDEX_NAME}
       TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
   tei-embedding-service:
-    image: ghcr.io/huggingface/tei-gaudi:1.5.0
+    image: ghcr.io/huggingface/tei-gaudi:1.5.2
     entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${EMBEDDING_MODEL_ID} --auto-truncate"
     container_name: tei-embedding-gaudi-server
     ports:
@@ -68,7 +69,7 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       LOGFLAG: ${LOGFLAG}
     restart: unless-stopped
   retriever:
@@ -101,7 +102,8 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HHF_TOKE: ${HF_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
       host_ip: ${host_ip}
@@ -125,7 +127,8 @@ services:
       https_proxy: ${https_proxy}
       RERANK_TYPE: ${RERANK_TYPE}
       TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HHF_TOKE: ${HF_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
       LOGFLAG: ${LOGFLAG}
diff --git a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/set_env.sh b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/set_env.sh
index e4f5c207ba..5429c51221 100644
--- a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -5,3 +5,24 @@
 pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null
+
+if [ -z "$HF_TOKEN" ]; then
+    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
+    return -1
+fi
+
+export host_ip=$(hostname -I | awk '{print $1}')
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:8090"
+export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
+export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
+export REDIS_URL="redis://${host_ip}:6379"
+export INDEX_NAME="rag-redis"
+export MEGA_SERVICE_HOST_IP=${host_ip}
+export EMBEDDING_SERVICE_HOST_IP=${host_ip}
+export RETRIEVER_SERVICE_HOST_IP=${host_ip}
+export RERANK_SERVICE_HOST_IP=${host_ip}
+export LLM_SERVICE_HOST_IP=${host_ip}
+export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8000/v1/retrievaltool"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
diff --git a/DocSum/docker_compose/intel/cpu/xeon/README.md b/DocSum/docker_compose/intel/cpu/xeon/README.md
index 9465c0c976..8265ab3361 100644
--- a/DocSum/docker_compose/intel/cpu/xeon/README.md
+++ b/DocSum/docker_compose/intel/cpu/xeon/README.md
@@ -83,7 +83,7 @@ Default model is "Intel/neural-chat-7b-v3-3". Change "LLM_MODEL_ID" environment
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
 ```
 
-When using gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable.
+When using gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) to "HF_TOKEN" environment variable.
 
 ### Setup Environment Variable
 
@@ -96,7 +96,7 @@ To set up environment variables for deploying Document Summarization services, f
    export host_ip="External_Public_IP"
    # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
    export no_proxy="Your_No_Proxy"
-   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
+   export HF_TOKEN="Your_Huggingface_API_Token"
    ```
 
 2. If you are in a proxy environment, also set the proxy-related environment variables:
diff --git a/DocSum/docker_compose/intel/cpu/xeon/compose.yaml b/DocSum/docker_compose/intel/cpu/xeon/compose.yaml
index 2c4344cc23..c46f443945 100644
--- a/DocSum/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/DocSum/docker_compose/intel/cpu/xeon/compose.yaml
@@ -12,7 +12,8 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       host_ip: ${host_ip}
       LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
     healthcheck:
@@ -39,7 +40,8 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       LLM_ENDPOINT: ${LLM_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
       MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
       MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
diff --git a/DocSum/docker_compose/intel/hpu/gaudi/README.md b/DocSum/docker_compose/intel/hpu/gaudi/README.md
index d150b3f28e..06e84ca6d7 100644
--- a/DocSum/docker_compose/intel/hpu/gaudi/README.md
+++ b/DocSum/docker_compose/intel/hpu/gaudi/README.md
@@ -75,7 +75,7 @@ Default model is "Intel/neural-chat-7b-v3-3". Change "LLM_MODEL_ID" environment
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
 ```
 
-When using gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable.
+When using gated models, you also need to provide [HuggingFace token](https://huggingface.co/docs/hub/security-tokens) to "HF_TOKEN" environment variable.
 
 ### Setup Environment Variable
 
@@ -88,7 +88,7 @@ To set up environment variables for deploying Document Summarization services, f
    export host_ip="External_Public_IP"
    # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
    export no_proxy="Your_No_Proxy"
-   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
+   export HF_TOKEN="Your_Huggingface_API_Token"
    ```
 
 2. If you are in a proxy environment, also set the proxy-related environment variables:
diff --git a/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml b/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml
index 9ae96e3d7d..77a91f24b6 100644
--- a/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -13,7 +13,7 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
       HABANA_VISIBLE_DEVICES: all
@@ -48,7 +48,7 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
       MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
       LLM_ENDPOINT: ${LLM_ENDPOINT}
diff --git a/DocSum/docker_compose/set_env.sh b/DocSum/docker_compose/set_env.sh
index f116a99c3a..42dc74f7a7 100644
--- a/DocSum/docker_compose/set_env.sh
+++ b/DocSum/docker_compose/set_env.sh
@@ -9,6 +9,12 @@ popd > /dev/null
 export MAX_INPUT_TOKENS=1024
 export MAX_TOTAL_TOKENS=2048
 
+if [ -z "$HF_TOKEN" ]; then
+    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
+    return -1
+fi
+
+export host_ip=$(hostname -I | awk '{print $1}')
 export no_proxy="${no_proxy},${host_ip}"
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
diff --git a/DocSum/tests/test_compose_on_gaudi.sh b/DocSum/tests/test_compose_on_gaudi.sh
index e129608aa1..ccbe0d35c8 100644
--- a/DocSum/tests/test_compose_on_gaudi.sh
+++ b/DocSum/tests/test_compose_on_gaudi.sh
@@ -17,7 +17,7 @@ export TAG=${IMAGE_TAG}
 export MAX_INPUT_TOKENS=2048
 export MAX_TOTAL_TOKENS=4096
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HF_TOKEN=${HF_TOKEN}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export ASR_SERVICE_HOST_IP=${host_ip}
diff --git a/DocSum/tests/test_compose_on_xeon.sh b/DocSum/tests/test_compose_on_xeon.sh
index de208292a5..dc4734c0d0 100644
--- a/DocSum/tests/test_compose_on_xeon.sh
+++ b/DocSum/tests/test_compose_on_xeon.sh
@@ -17,7 +17,7 @@ export TAG=${IMAGE_TAG}
 export MAX_INPUT_TOKENS=2048
 export MAX_TOTAL_TOKENS=4096
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export HF_TOKEN=${HF_TOKEN}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export ASR_SERVICE_HOST_IP=${host_ip}
diff --git a/FaqGen/docker_compose/intel/cpu/xeon/README.md b/FaqGen/docker_compose/intel/cpu/xeon/README.md
index a961a6aa98..c0dc39ddbb 100644
--- a/FaqGen/docker_compose/intel/cpu/xeon/README.md
+++ b/FaqGen/docker_compose/intel/cpu/xeon/README.md
@@ -64,7 +64,7 @@ Then run the command `docker images`, you will have the following Docker Images:
 
 We set default model as "meta-llama/Meta-Llama-3-8B-Instruct", change "LLM_MODEL_ID" in following Environment Variables setting if you want to use other models.
 
-If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable.
+If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HF_TOKEN" environment variable.
 
 ### Setup Environment Variables
 
@@ -79,7 +79,7 @@ export LLM_ENDPOINT_PORT=8008
 export LLM_SERVICE_PORT=9000
 export FAQGen_COMPONENT_NAME="OpeaFaqGenTgi"
 export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export HF_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
diff --git a/FaqGen/docker_compose/intel/cpu/xeon/compose.yaml b/FaqGen/docker_compose/intel/cpu/xeon/compose.yaml
index a20c784786..e2962f4977 100644
--- a/FaqGen/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/FaqGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -14,7 +14,8 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       host_ip: ${host_ip}
       LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
     healthcheck:
@@ -38,7 +39,8 @@ services:
       https_proxy: ${https_proxy}
       LLM_ENDPOINT: ${LLM_ENDPOINT}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       FAQGen_COMPONENT_NAME: ${FAQGen_COMPONENT_NAME}
       LOGFLAG: ${LOGFLAG:-False}
     restart: unless-stopped
diff --git a/FaqGen/docker_compose/intel/hpu/gaudi/README.md b/FaqGen/docker_compose/intel/hpu/gaudi/README.md
index 7364e92387..19a86a5f6e 100644
--- a/FaqGen/docker_compose/intel/hpu/gaudi/README.md
+++ b/FaqGen/docker_compose/intel/hpu/gaudi/README.md
@@ -17,7 +17,7 @@ To set up environment variables for deploying ChatQnA services, follow these ste
    ```bash
    # Example: host_ip="192.168.1.1"
    export host_ip=$(hostname -I | awk '{print $1}')
-   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
+   export HF_TOKEN="Your_Huggingface_API_Token"
    ```
 
 2. If you are in a proxy environment, also set the proxy-related environment variables:
@@ -144,7 +144,7 @@ Then run the command `docker images`, you will have the following Docker Images:
 
 We set default model as "meta-llama/Meta-Llama-3-8B-Instruct", change "LLM_MODEL_ID" in following Environment Variables setting if you want to use other models.
 
-If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HUGGINGFACEHUB_API_TOKEN" environment variable.
+If use gated models, you also need to provide [huggingface token](https://huggingface.co/docs/hub/security-tokens) to "HF_TOKEN" environment variable.
 
 ### Setup Environment Variables
 
@@ -159,7 +159,7 @@ export LLM_ENDPOINT_PORT=8008
 export LLM_SERVICE_PORT=9000
 export FAQGen_COMPONENT_NAME="OpeaFaqGenTgi"
 export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export HF_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
diff --git a/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml b/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml
index 90503069c1..4387e8aad3 100644
--- a/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -13,7 +13,8 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
       HABANA_VISIBLE_DEVICES: all
@@ -51,7 +52,8 @@ services:
       https_proxy: ${https_proxy}
       LLM_ENDPOINT: ${LLM_ENDPOINT}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       FAQGen_COMPONENT_NAME: ${FAQGen_COMPONENT_NAME}
       LOGFLAG: ${LOGFLAG:-False}
     restart: unless-stopped
diff --git a/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml b/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml
index 29171a20f2..a8a20a2692 100644
--- a/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -35,11 +35,12 @@ services:
       NO_PROXY: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
     ipc: host
     command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
   tgi-gaudi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
+    image: ghcr.io/huggingface/tgi-gaudi:2.3.1
     container_name: tgi-gaudi-server
     ports:
       - "6005:80"
diff --git a/GraphRAG/docker_compose/intel/hpu/gaudi/set_env.sh b/GraphRAG/docker_compose/intel/hpu/gaudi/set_env.sh
index 97c462c581..865147b2c5 100644
--- a/GraphRAG/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/GraphRAG/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -10,6 +10,12 @@ pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null
 
+if [ -z "$HF_TOKEN" ]; then
+    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
+    return -1
+fi
+
+export host_ip=$(hostname -I | awk '{print $1}')
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export OPENAI_EMBEDDING_MODEL="text-embedding-3-small"
 export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct"
diff --git a/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml b/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml
index 31f543c755..fcb29b55d8 100644
--- a/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -36,7 +36,8 @@ services:
       DATAPREP_MMR_PORT: ${DATAPREP_MMR_PORT}
       INDEX_NAME: ${INDEX_NAME}
       LVM_ENDPOINT: "http://${LVM_SERVICE_HOST_IP}:${LVM_PORT}/v1/lvm"
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       MULTIMODAL_DATAPREP: true
       DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MULTIMODALREDIS"
     restart: unless-stopped
diff --git a/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh b/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh
index 057f90990c..e2a143e5d9 100755
--- a/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -6,6 +6,11 @@ pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null
 
+if [ -z "$HF_TOKEN" ]; then
+    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
+    return -1
+fi
+
 export host_ip=$(hostname -I | awk '{print $1}')
 
 export no_proxy=${your_no_proxy}
diff --git a/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml
index 26b5610f5e..eeaa08449c 100644
--- a/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -38,7 +38,8 @@ services:
       DATAPREP_MMR_PORT: ${DATAPREP_MMR_PORT}
       INDEX_NAME: ${INDEX_NAME}
       LVM_ENDPOINT: "http://${LVM_SERVICE_HOST_IP}:${LVM_PORT}/v1/lvm"
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       MULTIMODAL_DATAPREP: true
       DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MULTIMODALREDIS"
     restart: unless-stopped
diff --git a/MultimodalQnA/docker_compose/intel/hpu/gaudi/set_env.sh b/MultimodalQnA/docker_compose/intel/hpu/gaudi/set_env.sh
index cc35d58d08..a4bd92bf39 100755
--- a/MultimodalQnA/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/MultimodalQnA/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -6,6 +6,11 @@ pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null
 
+if [ -z "$HF_TOKEN" ]; then
+    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
+    return -1
+fi
+
 export host_ip=$(hostname -I | awk '{print $1}')
 
 export MM_EMBEDDING_SERVICE_HOST_IP=${host_ip}
diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md
index 5ab4816096..a4c700a017 100644
--- a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md
+++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md
@@ -143,7 +143,7 @@ export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
 export TGI_LLM_ENDPOINT="http://${host_ip}:9009"
 export REDIS_URL="redis://${host_ip}:6379"
 export INDEX_NAME="rag-redis"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export HF_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export EMBEDDING_SERVICE_HOST_IP=${host_ip}
 export RETRIEVER_SERVICE_HOST_IP=${host_ip}
diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
index ee7d23a640..98e751b534 100644
--- a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
@@ -65,7 +65,7 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       LOGFLAG: ${LOGFLAG}
     restart: unless-stopped
   retriever:
diff --git a/SearchQnA/docker_compose/intel/cpu/xeon/README.md b/SearchQnA/docker_compose/intel/cpu/xeon/README.md
index f80c2846f7..f60e17cea7 100644
--- a/SearchQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/SearchQnA/docker_compose/intel/cpu/xeon/README.md
@@ -66,7 +66,7 @@ Before starting the services with `docker compose`, you have to recheck the foll
 export host_ip=<your External Public IP>    # export host_ip=$(hostname -I | awk '{print $1}')
 export GOOGLE_CSE_ID=<your cse id>
 export GOOGLE_API_KEY=<your google api key>
-export HUGGINGFACEHUB_API_TOKEN=<your HF token>
+export HF_TOKEN=<your HF token>
 
 export EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
 export TEI_EMBEDDING_ENDPOINT=http://${host_ip}:3001
diff --git a/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml b/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml
index 61f5f2a2fc..5b7d6cc73d 100644
--- a/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -35,7 +35,7 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       LOGFLAG: ${LOGFLAG}
     restart: unless-stopped
   web-retriever:
@@ -87,7 +87,7 @@ services:
       https_proxy: ${https_proxy}
       RERANK_TYPE: ${RERANK_TYPE}
       TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
       LOGFLAG: ${LOGFLAG}
     restart: unless-stopped
   tgi-service:
@@ -102,7 +102,7 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       host_ip: ${host_ip}
     healthcheck:
       test: ["CMD-SHELL", "curl -f http://$host_ip:3006/health || exit 1"]
@@ -125,7 +125,7 @@ services:
       https_proxy: ${https_proxy}
       LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
       LOGFLAG: ${LOGFLAG}
     restart: unless-stopped
   searchqna-xeon-backend-server:
diff --git a/SearchQnA/docker_compose/intel/hpu/gaudi/README.md b/SearchQnA/docker_compose/intel/hpu/gaudi/README.md
index 679a0ad381..5daf710c1d 100644
--- a/SearchQnA/docker_compose/intel/hpu/gaudi/README.md
+++ b/SearchQnA/docker_compose/intel/hpu/gaudi/README.md
@@ -65,7 +65,7 @@ Before starting the services with `docker compose`, you have to recheck the foll
 export host_ip=<your External Public IP>
 export GOOGLE_CSE_ID=<your cse id>
 export GOOGLE_API_KEY=<your google api key>
-export HUGGINGFACEHUB_API_TOKEN=<your HF token>
+export HF_TOKEN=<your HF token>
 
 export EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
 export TEI_EMBEDDING_ENDPOINT=http://$host_ip:3001
diff --git a/SearchQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/SearchQnA/docker_compose/intel/hpu/gaudi/compose.yaml
index f79bb9758c..d2c24ec665 100644
--- a/SearchQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/SearchQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -43,7 +43,7 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       LOGFLAG: ${LOGFLAG}
     restart: unless-stopped
   web-retriever:
@@ -94,7 +94,8 @@ services:
       https_proxy: ${https_proxy}
       RERANK_TYPE: ${RERANK_TYPE}
       TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       LOGFLAG: ${LOGFLAG}
     restart: unless-stopped
   tgi-service:
@@ -108,7 +109,8 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
       HABANA_VISIBLE_DEVICES: all
@@ -142,7 +144,8 @@ services:
       https_proxy: ${https_proxy}
       LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
       LOGFLAG: ${LOGFLAG}
diff --git a/SearchQnA/docker_compose/set_env.sh b/SearchQnA/docker_compose/set_env.sh
index 232dcf7281..8438206f3e 100644
--- a/SearchQnA/docker_compose/set_env.sh
+++ b/SearchQnA/docker_compose/set_env.sh
@@ -6,6 +6,10 @@ pushd "../../" > /dev/null
 source .set_env.sh
 popd > /dev/null
 
+if [ -z "$HF_TOKEN" ]; then
+    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
+    return -1
+fi
 
 export EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
 export TEI_EMBEDDING_ENDPOINT=http://${host_ip}:3001
diff --git a/Translation/docker_compose/intel/cpu/xeon/README.md b/Translation/docker_compose/intel/cpu/xeon/README.md
index 4a41cb5385..c7ba127fcf 100644
--- a/Translation/docker_compose/intel/cpu/xeon/README.md
+++ b/Translation/docker_compose/intel/cpu/xeon/README.md
@@ -91,7 +91,7 @@ Change the `LLM_MODEL_ID` below for your needs.
    export host_ip="External_Public_IP"
    # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
    export no_proxy="Your_No_Proxy"
-   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
+   export HF_TOKEN="Your_Huggingface_API_Token"
    # Example: NGINX_PORT=80
    export NGINX_PORT=${your_nginx_port}
    ```
diff --git a/Translation/docker_compose/intel/cpu/xeon/compose.yaml b/Translation/docker_compose/intel/cpu/xeon/compose.yaml
index d876f99f2a..56624252d8 100644
--- a/Translation/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/Translation/docker_compose/intel/cpu/xeon/compose.yaml
@@ -11,7 +11,7 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
       host_ip: ${host_ip}
@@ -39,7 +39,8 @@ services:
       https_proxy: ${https_proxy}
       LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
     restart: unless-stopped
diff --git a/Translation/docker_compose/intel/hpu/gaudi/README.md b/Translation/docker_compose/intel/hpu/gaudi/README.md
index 31ed7da040..6962915612 100644
--- a/Translation/docker_compose/intel/hpu/gaudi/README.md
+++ b/Translation/docker_compose/intel/hpu/gaudi/README.md
@@ -83,7 +83,7 @@ Change the `LLM_MODEL_ID` below for your needs.
    export host_ip="External_Public_IP"
    # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
    export no_proxy="Your_No_Proxy"
-   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
+   export HF_TOKEN="Your_Huggingface_API_Token"
    # Example: NGINX_PORT=80
    export NGINX_PORT=${your_nginx_port}
    ```
diff --git a/Translation/docker_compose/intel/hpu/gaudi/compose.yaml b/Translation/docker_compose/intel/hpu/gaudi/compose.yaml
index be983b7b13..310af6602d 100644
--- a/Translation/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/Translation/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -11,7 +11,8 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
       HABANA_VISIBLE_DEVICES: all
@@ -47,7 +48,8 @@ services:
       https_proxy: ${https_proxy}
       LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
     restart: unless-stopped
diff --git a/Translation/docker_compose/set_env.sh b/Translation/docker_compose/set_env.sh
index aa4b428f6e..614d10d5ea 100644
--- a/Translation/docker_compose/set_env.sh
+++ b/Translation/docker_compose/set_env.sh
@@ -6,10 +6,13 @@ pushd "../../" > /dev/null
 source .set_env.sh
 popd > /dev/null
 
+if [ -z "$HF_TOKEN" ]; then
+    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
+    return -1
+fi
 
 export LLM_MODEL_ID="haoranxu/ALMA-13B"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/translation"
diff --git a/VisualQnA/docker_compose/intel/cpu/xeon/set_env.sh b/VisualQnA/docker_compose/intel/cpu/xeon/set_env.sh
index 58874c7d66..f236867021 100644
--- a/VisualQnA/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/VisualQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -6,6 +6,10 @@ pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null
 
+if [ -z "$HF_TOKEN" ]; then
+    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
+    return -1
+fi
 
 export LVM_MODEL_ID="llava-hf/llava-v1.6-mistral-7b-hf"
 export LVM_ENDPOINT="http://${host_ip}:8399"
diff --git a/VisualQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/VisualQnA/docker_compose/intel/hpu/gaudi/compose.yaml
index fa17cf36d1..f94338a69e 100644
--- a/VisualQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/VisualQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -17,7 +17,8 @@ services:
       HF_HUB_ENABLE_HF_TRANSFER: 0
       HABANA_VISIBLE_DEVICES: all
       OMPI_MCA_btl_vader_single_copy_mechanism: none
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       ENABLE_HPU_GRAPH: true
       LIMIT_HPU_GRAPH: true
       USE_FLASH_ATTENTION: true
diff --git a/VisualQnA/docker_compose/intel/hpu/gaudi/set_env.sh b/VisualQnA/docker_compose/intel/hpu/gaudi/set_env.sh
index 028966b77c..4ef75f7066 100644
--- a/VisualQnA/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/VisualQnA/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -6,6 +6,12 @@ pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null
 
+if [ -z "$HF_TOKEN" ]; then
+    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
+    return -1
+fi
+
+export host_ip=$(hostname -I | awk '{print $1}')
 export LVM_MODEL_ID="llava-hf/llava-v1.6-mistral-7b-hf"
 export LVM_ENDPOINT="http://${host_ip}:8399"
 export LVM_SERVICE_PORT=9399