diff --git a/ChatQnA/README.md b/ChatQnA/README.md index ffad72d85..70fac96f7 100644 --- a/ChatQnA/README.md +++ b/ChatQnA/README.md @@ -91,6 +91,14 @@ cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/ docker compose up -d ``` +To enable Open Telemetry Tracing, compose_telemetry.yaml file need to be merged along with default compose.yaml file. +CPU example with Open Telemetry feature: + +```bash +cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/ +docker compose -f compose.yaml -f compose_telemetry.yaml up -d +``` + It will automatically download the docker image on `docker hub`: ```bash @@ -232,6 +240,13 @@ cd GenAIExamples/ChatQnA/docker_compose/intel/hpu/gaudi/ docker compose up -d ``` +To enable Open Telemetry Tracing, compose_telemetry.yaml file need to be merged along with default compose.yaml file. + +```bash +cd GenAIExamples/ChatQnA/docker_compose/intel/hpu/gaudi/ +docker compose -f compose.yaml -f compose_telemetry.yaml up -d +``` + Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) to build docker images from source. ### Deploy ChatQnA on Xeon @@ -243,6 +258,13 @@ cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/ docker compose up -d ``` +To enable Open Telemetry Tracing, compose_telemetry.yaml file need to be merged along with default compose.yaml file. + +```bash +cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/ +docker compose -f compose.yaml -f compose_telemetry.yaml up -d +``` + Refer to the [Xeon Guide](./docker_compose/intel/cpu/xeon/README.md) for more instructions on building docker images from source. ### Deploy ChatQnA on NVIDIA GPU @@ -346,7 +368,7 @@ OPEA microservice deployment can easily be monitored through Grafana dashboards ## Tracing Services with OpenTelemetry Tracing and Jaeger -> NOTE: limited support. Only LLM inference serving with TGI on Gaudi is enabled for this feature. +> NOTE: This feature is disabled by default. Please check the Deploy ChatQnA sessions for how to enable this feature with compose_telemetry.yaml file. OPEA microservice and TGI/TEI serving can easily be traced through Jaeger dashboards in conjunction with OpenTelemetry Tracing feature. Follow the [README](https://github.com/opea-project/GenAIComps/tree/main/comps/cores/telemetry#tracing) to trace additional functions if needed. diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README.md b/ChatQnA/docker_compose/intel/cpu/xeon/README.md index 764afba4d..7fd630e93 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/README.md +++ b/ChatQnA/docker_compose/intel/cpu/xeon/README.md @@ -44,6 +44,14 @@ To set up environment variables for deploying ChatQnA services, follow these ste docker compose up -d ``` +To enable Open Telemetry Tracing, compose_telemetry.yaml file need to be merged along with default compose.yaml file. +CPU example with Open Telemetry feature: + +```bash +cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/ +docker compose -f compose.yaml -f compose_telemetry.yaml up -d +``` + It will automatically download the docker image on `docker hub`: ```bash @@ -263,12 +271,16 @@ If use vLLM as the LLM serving backend. docker compose -f compose.yaml up -d # Start ChatQnA without Rerank Pipeline docker compose -f compose_without_rerank.yaml up -d +# Start ChatQnA with Rerank Pipeline and Open Telemetry Tracing +docker compose -f compose.yaml -f compose_telemetry.yaml up -d ``` If use TGI as the LLM serving backend. ```bash docker compose -f compose_tgi.yaml up -d +# Start ChatQnA with Open Telemetry Tracing +docker compose -f compose_tgi.yaml -f compose_tgi_telemetry.yaml up -d ``` ### Validate Microservices diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_telemetry.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_telemetry.yaml new file mode 100644 index 000000000..4da33d6d5 --- /dev/null +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_telemetry.yaml @@ -0,0 +1,27 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + tei-embedding-service: + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT + tei-reranking-service: + command: --model-id ${RERANK_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT + jaeger: + image: jaegertracing/all-in-one:latest + container_name: jaeger + ports: + - "16686:16686" + - "4317:4317" + - "4318:4318" + - "9411:9411" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + COLLECTOR_ZIPKIN_HOST_PORT: 9411 + restart: unless-stopped + chatqna-xeon-backend-server: + environment: + - ENABLE_OPEA_TELEMETRY=true + - TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT} diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi_telemetry.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi_telemetry.yaml new file mode 100644 index 000000000..2ba137539 --- /dev/null +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi_telemetry.yaml @@ -0,0 +1,29 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + tei-embedding-service: + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT + tei-reranking-service: + command: --model-id ${RERANK_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT + tgi-service: + command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT + jaeger: + image: jaegertracing/all-in-one:latest + container_name: jaeger + ports: + - "16686:16686" + - "4317:4317" + - "4318:4318" + - "9411:9411" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + COLLECTOR_ZIPKIN_HOST_PORT: 9411 + restart: unless-stopped + chatqna-xeon-backend-server: + environment: + - ENABLE_OPEA_TELEMETRY=true + - TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT} diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh b/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh index cc6779cf7..1d287c864 100755 --- a/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh +++ b/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh @@ -14,3 +14,7 @@ export INDEX_NAME="rag-redis" # Set it as a non-null string, such as true, if you want to enable logging facility, # otherwise, keep it as "" to disable it. export LOGFLAG="" +# Set OpenTelemetry Tracing Endpoint +export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+') +export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317 +export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md index b0e9fe19b..724bba599 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md @@ -45,6 +45,12 @@ To set up environment variables for deploying ChatQnA services, follow these ste docker compose up -d ``` +To enable Open Telemetry Tracing, compose_telemetry.yaml file need to be merged along with default compose.yaml file. + +```bash +docker compose -f compose.yaml -f compose_telemetry.yaml up -d +``` + It will automatically download the docker image on `docker hub`: ```bash @@ -259,12 +265,16 @@ If use vLLM as the LLM serving backend. docker compose -f compose.yaml up -d # Start ChatQnA without Rerank Pipeline docker compose -f compose_without_rerank.yaml up -d +# Start ChatQnA with Rerank Pipeline and Open Telemetry Tracing +docker compose -f compose.yaml -f compose_telemetry.yaml up -d ``` If use TGI as the LLM serving backend. ```bash docker compose -f compose_tgi.yaml up -d +# Start ChatQnA with Open Telemetry Tracing +docker compose -f compose_tgi.yaml -f compose_tgi_telemetry.yaml up -d ``` If you want to enable guardrails microservice in the pipeline, please follow the below command instead: diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_telemetry.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_telemetry.yaml new file mode 100644 index 000000000..97c71720c --- /dev/null +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_telemetry.yaml @@ -0,0 +1,27 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + tei-embedding-service: + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT + tei-reranking-service: + command: --model-id ${RERANK_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT + jaeger: + image: jaegertracing/all-in-one:latest + container_name: jaeger + ports: + - "16686:16686" + - "4317:4317" + - "4318:4318" + - "9411:9411" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + COLLECTOR_ZIPKIN_HOST_PORT: 9411 + restart: unless-stopped + chatqna-gaudi-backend-server: + environment: + - ENABLE_OPEA_TELEMETRY=true + - TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT} diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml index 8f860996c..7fb743e81 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml @@ -25,7 +25,6 @@ services: INDEX_NAME: ${INDEX_NAME} TEI_ENDPOINT: http://tei-embedding-service:80 HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - TELEMETRY_ENDPOINT: ${TELEMETRY_ENDPOINT} tei-embedding-service: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 container_name: tei-embedding-gaudi-server @@ -38,7 +37,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate retriever: image: ${REGISTRY:-opea}/retriever:${TAG:-latest} container_name: retriever-redis-server @@ -56,7 +55,6 @@ services: INDEX_NAME: ${INDEX_NAME} TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - TELEMETRY_ENDPOINT: ${TELEMETRY_ENDPOINT} LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS" restart: unless-stopped @@ -80,7 +78,7 @@ services: HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none MAX_WARMUP_SEQUENCE_LENGTH: 512 - command: --model-id ${RERANK_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT + command: --model-id ${RERANK_MODEL_ID} --auto-truncate tgi-service: image: ghcr.io/huggingface/tgi-gaudi:2.0.6 container_name: tgi-gaudi-server @@ -106,22 +104,7 @@ services: cap_add: - SYS_NICE ipc: host - command: --model-id ${LLM_MODEL_ID} --num-shard ${NUM_CARDS} --max-input-length 2048 --max-total-tokens 4096 --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT - jaeger: - image: jaegertracing/all-in-one:latest - container_name: jaeger - ports: - - "16686:16686" - - "4317:4317" - - "4318:4318" - - "9411:9411" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - COLLECTOR_ZIPKIN_HOST_PORT: 9411 - restart: unless-stopped + command: --model-id ${LLM_MODEL_ID} --num-shard ${NUM_CARDS} --max-input-length 2048 --max-total-tokens 4096 chatqna-gaudi-backend-server: image: ${REGISTRY:-opea}/chatqna:${TAG:-latest} container_name: chatqna-gaudi-backend-server @@ -147,7 +130,6 @@ services: - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80} - LLM_MODEL=${LLM_MODEL_ID} - LOGFLAG=${LOGFLAG} - - TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT} ipc: host restart: always chatqna-gaudi-ui-server: diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi_telemetry.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi_telemetry.yaml new file mode 100644 index 000000000..0da707ebc --- /dev/null +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi_telemetry.yaml @@ -0,0 +1,29 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + tei-embedding-service: + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT + tei-reranking-service: + command: --model-id ${RERANK_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT + tgi-service: + command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096 --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT + jaeger: + image: jaegertracing/all-in-one:latest + container_name: jaeger + ports: + - "16686:16686" + - "4317:4317" + - "4318:4318" + - "9411:9411" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + COLLECTOR_ZIPKIN_HOST_PORT: 9411 + restart: unless-stopped + chatqna-gaudi-backend-server: + environment: + - ENABLE_OPEA_TELEMETRY=true + - TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT} diff --git a/ChatQnA/tests/test_compose_on_gaudi.sh b/ChatQnA/tests/test_compose_on_gaudi.sh index 67b784590..ab78453a8 100644 --- a/ChatQnA/tests/test_compose_on_gaudi.sh +++ b/ChatQnA/tests/test_compose_on_gaudi.sh @@ -49,9 +49,12 @@ function start_services() { export INDEX_NAME="rag-redis" export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export host_ip=${ip_address} + export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+') + export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317 + export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces # Start Docker Containers - docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log + docker compose -f compose.yaml -f compose_telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log n=0 until [[ "$n" -ge 160 ]]; do echo "n=$n" @@ -172,7 +175,7 @@ function validate_frontend() { function stop_docker() { cd $WORKPATH/docker_compose/intel/hpu/gaudi - docker compose -f compose.yaml down + docker compose -f compose.yaml -f compose_telemetry.yaml down } function main() { diff --git a/ChatQnA/tests/test_compose_on_xeon.sh b/ChatQnA/tests/test_compose_on_xeon.sh index b9ba317a0..591c75f10 100644 --- a/ChatQnA/tests/test_compose_on_xeon.sh +++ b/ChatQnA/tests/test_compose_on_xeon.sh @@ -49,9 +49,12 @@ function start_services() { export INDEX_NAME="rag-redis" export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export host_ip=${ip_address} + export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+') + export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317 + export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces # Start Docker Containers - docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log + docker compose -f compose.yaml -f compose_telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log n=0 until [[ "$n" -ge 100 ]]; do docker logs vllm-service > ${LOG_PATH}/vllm_service_start.log 2>&1 @@ -172,7 +175,7 @@ function validate_frontend() { function stop_docker() { cd $WORKPATH/docker_compose/intel/cpu/xeon - docker compose -f compose.yaml down + docker compose -f compose.yaml -f compose_telemetry.yaml down } function main() { diff --git a/ChatQnA/tests/test_compose_tgi_on_gaudi.sh b/ChatQnA/tests/test_compose_tgi_on_gaudi.sh index c4285c219..2c90d40d2 100644 --- a/ChatQnA/tests/test_compose_tgi_on_gaudi.sh +++ b/ChatQnA/tests/test_compose_tgi_on_gaudi.sh @@ -54,7 +54,7 @@ function start_services() { export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces # Start Docker Containers - docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log + docker compose -f compose_tgi.yaml -f compose_tgi_telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log n=0 until [[ "$n" -ge 500 ]]; do @@ -218,7 +218,7 @@ function validate_frontend() { function stop_docker() { cd $WORKPATH/docker_compose/intel/hpu/gaudi - docker compose -f compose_tgi.yaml down + docker compose -f compose_tgi.yaml -f compose_tgi_telemetry.yaml down } function main() { diff --git a/ChatQnA/tests/test_compose_tgi_on_xeon.sh b/ChatQnA/tests/test_compose_tgi_on_xeon.sh index 9d96dcc52..9013183a0 100644 --- a/ChatQnA/tests/test_compose_tgi_on_xeon.sh +++ b/ChatQnA/tests/test_compose_tgi_on_xeon.sh @@ -48,9 +48,12 @@ function start_services() { export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" export INDEX_NAME="rag-redis" export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+') + export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317 + export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces # Start Docker Containers - docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log + docker compose -f compose_tgi.yaml -f compose_tgi_telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log n=0 until [[ "$n" -ge 100 ]]; do @@ -216,7 +219,7 @@ function validate_frontend() { function stop_docker() { cd $WORKPATH/docker_compose/intel/cpu/xeon - docker compose -f compose_tgi.yaml down + docker compose -f compose_tgi.yaml -f compose_tgi_telemetry.yaml down } function main() {