diff --git a/apps/voice_rag_qdrant/.env.template b/apps/voice_rag_qdrant/.env.template new file mode 100644 index 000000000..29a3624c8 --- /dev/null +++ b/apps/voice_rag_qdrant/.env.template @@ -0,0 +1,12 @@ +OPENAI_API_KEY= +DEEPGRAM_API_KEY= +AZURE_SPEECH_KEY= +AZURE_SPEECH_REGION= + +PINECONE_API_KEY= +PINECONE_ENVIRONMENT= +PINECONE_INDEX_NAME= + +QDRANT_HOST= +QDRANT_PORT= +QDRANT_COLLECTION_NAME= \ No newline at end of file diff --git a/apps/voice_rag_qdrant/Dockerfile b/apps/voice_rag_qdrant/Dockerfile new file mode 100644 index 000000000..7a7ad834a --- /dev/null +++ b/apps/voice_rag_qdrant/Dockerfile @@ -0,0 +1,54 @@ +# Use the official micromamba image as a base +FROM docker.io/mambaorg/micromamba:1.5-jammy + +# Create a new user '$MAMBA_USER' and set the working directory +COPY --chown=$MAMBA_USER:$MAMBA_USER environment.docker.yml /tmp/environment.yml + +# Install the specified packages using micromamba +RUN micromamba install -y -n base -f /tmp/environment.yml && \ + micromamba clean --all --yes + +USER root +WORKDIR /usr/local/src + +ARG VOCODE_USER=vocode +ARG VOCODE_UID=8476 +ARG VOCODE_GID=8476 + +RUN groupadd --gid $VOCODE_GID $VOCODE_USER && \ + useradd --uid $VOCODE_UID --gid $VOCODE_GID --shell /bin/bash --create-home $VOCODE_USER + +# COPY --chown=$VOCODE_USER:$VOCODE_USER ../../../ /vocode-python +# WORKDIR /usr/local/src/vocode +# RUN poetry install -E all + +# Copy the rest of your application files into the Docker image +COPY --chown=$VOCODE_USER:$VOCODE_USER . /vocode +WORKDIR /vocode + +#USER vocode +USER root + +ENV DOCKER_ENV="docker" + +# # Expose the port your FastAPI app will run on +EXPOSE 19002 + +# Set build arguments +ARG BUILD_DATE +ARG VCS_REF +ARG VERSION + +# Set labels +LABEL org.label-schema.build-date=$BUILD_DATE \ + org.label-schema.name="vocode" \ + org.label-schema.description="Vocode Docker Image" \ + org.label-schema.url="https://vocode.dev/" \ + org.label-schema.vcs-url="https://github.com/vocodedev" \ + org.label-schema.maintainer="vocode@arpagon.co" \ + org.label-schema.vcs-ref=$VCS_REF \ + org.label-schema.vendor="Vocode" \ + org.label-schema.version=$VERSION + +# Start the FastAPI app using Uvicorn +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "19002"] \ No newline at end of file diff --git a/apps/voice_rag_qdrant/README.md b/apps/voice_rag_qdrant/README.md new file mode 100644 index 000000000..75208faca --- /dev/null +++ b/apps/voice_rag_qdrant/README.md @@ -0,0 +1,37 @@ +# voice_rag + +## Docker + +1. Set up the configuration for your agent in `main.py`. +2. Set up an .env file using the template + +``` +cp .env.template .env +``` + +Fill in your API keys into .env + +3. Build the Docker image + +```bash +docker build --build-arg BUILD_DATE=$(date -u +'%Y-%m-%dT%H:%M:%SZ') \ + --build-arg VCS_REF=$(git rev-parse --short HEAD) \ + --build-arg VERSION=0.1.0 \ + -t vocode/vocode-voice-rag:0.1.0 . +``` + +4. Run the image and forward the port. + +```bash +docker run --env-file=.env -p 3000:3000 -t vocode/vocode-voice-rag +``` + +Now you have a client backend hosted at localhost:3000 to pass into the Vocode React SDK. You'll likely need to tunnel port 3000 to ngrok / host your server in order to use it in the React SDK. + +## Non-docker setup + +`main.py` just sets up a FastAPI server, so you can just run it with uvicorn: + +``` +uvicorn main:app +``` diff --git a/apps/voice_rag_qdrant/environment.docker.yml b/apps/voice_rag_qdrant/environment.docker.yml new file mode 100644 index 000000000..1a0995602 --- /dev/null +++ b/apps/voice_rag_qdrant/environment.docker.yml @@ -0,0 +1,21 @@ +name: vocode-rag +channels: + - conda-forge + - pytorch +dependencies: + - python=3.10 + - openssl=1.1.1w + - portaudio + - ffmpeg + - git + - pip + - pip: + # Installing vocode from the git repository + - git+https://github.com/ArtisanLabs/vocode-python/@461-VectorDB-OpenSource-Documentation#egg=vocode + - azure-cognitiveservices-speech==1.31.0 + - python-dotenv + - ipython + - deepgram-sdk + - uvicorn + - pinecone-client + - poetry \ No newline at end of file diff --git a/apps/voice_rag_qdrant/main.py b/apps/voice_rag_qdrant/main.py new file mode 100644 index 000000000..4febff2cf --- /dev/null +++ b/apps/voice_rag_qdrant/main.py @@ -0,0 +1,86 @@ +import os +import logging +from fastapi import FastAPI + +from vocode.streaming.models.agent import ChatGPTAgentConfig +from vocode.streaming.models.synthesizer import AzureSynthesizerConfig +from vocode.streaming.synthesizer.azure_synthesizer import AzureSynthesizer + +from vocode.streaming.agent.chat_gpt_agent import ChatGPTAgent +from vocode.streaming.client_backend.conversation import ConversationRouter +from vocode.streaming.models.message import BaseMessage +from vocode.streaming.vector_db.factory import VectorDBFactory +from vocode.streaming.vector_db.pinecone import PineconeConfig +from vocode.streaming.vector_db.qdrant import QdrantConfig +from vocode.streaming.transcriber.deepgram_transcriber import DeepgramTranscriber + + +from vocode.streaming.models.transcriber import ( + DeepgramTranscriberConfig, + TimeEndpointingConfig +) + +from dotenv import load_dotenv + +load_dotenv() + +app = FastAPI(docs_url=None) + +logging.basicConfig() +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + +vector_db_config = QdrantConfig( + index=os.getenv('QDRANT_COLLECTION_NAME') +) + +INITIAL_MESSAGE="Hello!" +PROMPT_PREAMBLE=''' +I want you to act as an IT Architect. +I will provide some details about the functionality of an application or other +digital product, and it will be your job to come up with ways to integrate it +into the IT landscape. This could involve analyzing business requirements, +performing a gap analysis, and mapping the functionality of the new system to +the existing IT landscape. The next steps are to create a solution design. + +You are an expert in these technologies: +- Langchain +- Supabase +- Next.js +- Fastapi +- Vocode. +''' + +TIME_ENDPOINTING_CONFIG = TimeEndpointingConfig() +TIME_ENDPOINTING_CONFIG.time_cutoff_seconds = 2 + +AZURE_SYNTHESIZER_THUNK = lambda output_audio_config: AzureSynthesizer( + AzureSynthesizerConfig.from_output_audio_config(output_audio_config, ), + logger=logger +) + +DEEPGRAM_TRANSCRIBER_THUNK = lambda input_audio_config: DeepgramTranscriber( + DeepgramTranscriberConfig.from_input_audio_config( + input_audio_config=input_audio_config, + endpointing_config=TIME_ENDPOINTING_CONFIG, + min_interrupt_confidence=0.9, + ), + logger=logger +) + +conversation_router = ConversationRouter( + agent_thunk=lambda: ChatGPTAgent( + ChatGPTAgentConfig( + initial_message=BaseMessage(text=INITIAL_MESSAGE), + prompt_preamble=PROMPT_PREAMBLE, + vector_db_config=vector_db_config, + logger=logger, + ), + logger=logger + ), + synthesizer_thunk=AZURE_SYNTHESIZER_THUNK, + transcriber_thunk=DEEPGRAM_TRANSCRIBER_THUNK, + logger=logger, +) + +app.include_router(conversation_router.get_router()) diff --git a/poetry.lock b/poetry.lock index 85c8605ae..6bcebc2a9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aiohttp" @@ -1271,7 +1271,7 @@ protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.1 || >4.21.1,<4 name = "grpcio" version = "1.57.0" description = "HTTP/2-based RPC framework" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "grpcio-1.57.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:092fa155b945015754bdf988be47793c377b52b88d546e45c6a9f9579ac7f7b6"}, @@ -1340,6 +1340,65 @@ googleapis-common-protos = ">=1.5.5" grpcio = ">=1.57.0" protobuf = ">=4.21.6" +[[package]] +name = "grpcio-tools" +version = "1.57.0" +description = "Protobuf code generator for gRPC" +optional = false +python-versions = ">=3.7" +files = [ + {file = "grpcio-tools-1.57.0.tar.gz", hash = "sha256:2f16130d869ce27ecd623194547b649dd657333ec7e8644cc571c645781a9b85"}, + {file = "grpcio_tools-1.57.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:4fb8a8468031f858381a576078924af364a08833d8f8f3237018252c4573a802"}, + {file = "grpcio_tools-1.57.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:35bf0dad8a3562043345236c26d0053a856fb06c04d7da652f2ded914e508ae7"}, + {file = "grpcio_tools-1.57.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:ec9aab2fb6783c7fc54bc28f58eb75f1ca77594e6b0fd5e5e7a8114a95169fe0"}, + {file = "grpcio_tools-1.57.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0cf5fc0a1c23f8ea34b408b72fb0e90eec0f404ad4dba98e8f6da3c9ce34e2ed"}, + {file = "grpcio_tools-1.57.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26e69d08a515554e0cfe1ec4d31568836f4b17f0ff82294f957f629388629eb9"}, + {file = "grpcio_tools-1.57.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c39a3656576b6fdaaf28abe0467f7a7231df4230c1bee132322dbc3209419e7f"}, + {file = "grpcio_tools-1.57.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f64f8ab22d27d4a5693310748d35a696061c3b5c7b8c4fb4ab3b4bc1068b6b56"}, + {file = "grpcio_tools-1.57.0-cp310-cp310-win32.whl", hash = "sha256:d2a134756f4db34759a5cc7f7e43f7eb87540b68d1cca62925593c6fb93924f7"}, + {file = "grpcio_tools-1.57.0-cp310-cp310-win_amd64.whl", hash = "sha256:9a3d60fb8d46ede26c1907c146561b3a9caa20a7aff961bc661ef8226f85a2e9"}, + {file = "grpcio_tools-1.57.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:aac98ecad8f7bd4301855669d42a5d97ef7bb34bec2b1e74c7a0641d47e313cf"}, + {file = "grpcio_tools-1.57.0-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:cdd020cb68b51462983b7c2dfbc3eb6ede032b8bf438d4554df0c3f08ce35c76"}, + {file = "grpcio_tools-1.57.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:f54081b08419a39221cd646363b5708857c696b3ad4784f1dcf310891e33a5f7"}, + {file = "grpcio_tools-1.57.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed85a0291fff45b67f2557fe7f117d3bc7af8b54b8619d27bf374b5c8b7e3ca2"}, + {file = "grpcio_tools-1.57.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e868cd6feb3ef07d4b35be104fe1fd0657db05259ff8f8ec5e08f4f89ca1191d"}, + {file = "grpcio_tools-1.57.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:dfb6f6120587b8e228a3cae5ee4985b5bdc18501bad05c49df61965dfc9d70a9"}, + {file = "grpcio_tools-1.57.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4a7ad7f328e28fc97c356d0f10fb10d8b5151bb65aa7cf14bf8084513f0b7306"}, + {file = "grpcio_tools-1.57.0-cp311-cp311-win32.whl", hash = "sha256:9867f2817b1a0c93c523f89ac6c9d8625548af4620a7ce438bf5a76e23327284"}, + {file = "grpcio_tools-1.57.0-cp311-cp311-win_amd64.whl", hash = "sha256:1f9e917a9f18087f6c14b4d4508fb94fca5c2f96852363a89232fb9b2124ac1f"}, + {file = "grpcio_tools-1.57.0-cp37-cp37m-linux_armv7l.whl", hash = "sha256:9f2aefa8a37bd2c4db1a3f1aca11377e2766214520fb70e67071f4ff8d8b0fa5"}, + {file = "grpcio_tools-1.57.0-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:850cbda0ec5d24c39e7215ede410276040692ca45d105fbbeada407fa03f0ac0"}, + {file = "grpcio_tools-1.57.0-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:6fa52972c9647876ea35f6dc2b51002a74ed900ec7894586cbb2fe76f64f99de"}, + {file = "grpcio_tools-1.57.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:76c0eea89d7542719594e50e2283f51a072978b953e8b3e9fd7c59a2c762d4c1"}, + {file = "grpcio_tools-1.57.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3da5240211252fc70a6451fe00c143e2ab2f7bfc2445695ad2ed056b8e48d96"}, + {file = "grpcio_tools-1.57.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:a0256f8786ac9e4db618a1aa492bb3472569a0946fd3ee862ffe23196323da55"}, + {file = "grpcio_tools-1.57.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:c026bdf5c1366ce88b7bbe2d8207374d675afd3fd911f60752103de3da4a41d2"}, + {file = "grpcio_tools-1.57.0-cp37-cp37m-win_amd64.whl", hash = "sha256:9053c2f655589545be08b9d6a673e92970173a4bf11a4b9f18cd6e9af626b587"}, + {file = "grpcio_tools-1.57.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:81ec4dbb696e095057b2528d11a8da04be6bbe2b967fa07d4ea9ba6354338cbf"}, + {file = "grpcio_tools-1.57.0-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:495e2946406963e0b9f063f76d5af0f2a19517dac2b367b5b044432ac9194296"}, + {file = "grpcio_tools-1.57.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:7b46fc6aa8eb7edd18cafcd21fd98703cb6c09e46b507de335fca7f0161dfccb"}, + {file = "grpcio_tools-1.57.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb81ff861692111fa81bd85f64584e624cb4013bd66fbce8a209b8893f5ce398"}, + {file = "grpcio_tools-1.57.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a42dc220eb5305f470855c9284f4c8e85ae59d6d742cd07946b0cbe5e9ca186"}, + {file = "grpcio_tools-1.57.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:90d10d9038ba46a595a223a34f136c9230e3d6d7abc2433dbf0e1c31939d3a8b"}, + {file = "grpcio_tools-1.57.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5bc3e6d338aefb052e19cedabe00452be46d0c10a4ed29ee77abb00402e438fe"}, + {file = "grpcio_tools-1.57.0-cp38-cp38-win32.whl", hash = "sha256:34b36217b17b5bea674a414229913e1fd80ede328be51e1b531fcc62abd393b0"}, + {file = "grpcio_tools-1.57.0-cp38-cp38-win_amd64.whl", hash = "sha256:dbde4004a0688400036342ff73e3706e8940483e2871547b1354d59e93a38277"}, + {file = "grpcio_tools-1.57.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:784574709b9690dc28696617ea69352e2132352fdfc9bc89afa8e39f99ae538e"}, + {file = "grpcio_tools-1.57.0-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:85ac4e62eb44428cde025fd9ab7554002315fc7880f791c553fc5a0015cc9931"}, + {file = "grpcio_tools-1.57.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:dc771d4db5701f280957bbcee91745e0686d00ed1c6aa7e05ba30a58b02d70a1"}, + {file = "grpcio_tools-1.57.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3ac06703c412f8167a9062eaf6099409967e33bf98fa5b02be4b4689b6bdf39"}, + {file = "grpcio_tools-1.57.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02d78c034109f46032c7217260066d49d41e6bcaf588fa28fa40fe2f83445347"}, + {file = "grpcio_tools-1.57.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:2db25f15ed44327f2e02d0c4fe741ac966f9500e407047d8a7c7fccf2df65616"}, + {file = "grpcio_tools-1.57.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2b417c97936d94874a3ce7ed8deab910f2233e3612134507cfee4af8735c38a6"}, + {file = "grpcio_tools-1.57.0-cp39-cp39-win32.whl", hash = "sha256:f717cce5093e6b6049d9ea6d12fdf3658efdb1a80772f7737db1f8510b876df6"}, + {file = "grpcio_tools-1.57.0-cp39-cp39-win_amd64.whl", hash = "sha256:1c0e8a1a32973a5d59fbcc19232f925e5c48116e9411f788033a31c5ca5130b4"}, +] + +[package.dependencies] +grpcio = ">=1.57.0" +protobuf = ">=4.21.6,<5.0dev" +setuptools = "*" + [[package]] name = "gtts" version = "2.3.2" @@ -1370,6 +1429,32 @@ files = [ {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, ] +[[package]] +name = "h2" +version = "4.1.0" +description = "HTTP/2 State-Machine based protocol implementation" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "h2-4.1.0-py3-none-any.whl", hash = "sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d"}, + {file = "h2-4.1.0.tar.gz", hash = "sha256:a83aca08fbe7aacb79fec788c9c0bac936343560ed9ec18b82a13a12c28d2abb"}, +] + +[package.dependencies] +hpack = ">=4.0,<5" +hyperframe = ">=6.0,<7" + +[[package]] +name = "hpack" +version = "4.0.0" +description = "Pure-Python HPACK header compression" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "hpack-4.0.0-py3-none-any.whl", hash = "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c"}, + {file = "hpack-4.0.0.tar.gz", hash = "sha256:fc41de0c63e687ebffde81187a948221294896f6bdc0ae2312708df339430095"}, +] + [[package]] name = "httpcore" version = "0.17.3" @@ -1404,6 +1489,7 @@ files = [ [package.dependencies] certifi = "*" +h2 = {version = ">=3,<5", optional = true, markers = "extra == \"http2\""} httpcore = ">=0.15.0,<0.18.0" idna = "*" sniffio = "*" @@ -1414,6 +1500,17 @@ cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] +[[package]] +name = "hyperframe" +version = "6.0.1" +description = "HTTP/2 framing layer for Python" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15"}, + {file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"}, +] + [[package]] name = "identify" version = "2.5.26" @@ -1660,6 +1757,16 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -2238,6 +2345,25 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "portalocker" +version = "2.8.2" +description = "Wraps the portalocker recipe for easy usage" +optional = false +python-versions = ">=3.8" +files = [ + {file = "portalocker-2.8.2-py3-none-any.whl", hash = "sha256:cfb86acc09b9aa7c3b43594e19be1345b9d16af3feb08bf92f23d4dce513a28e"}, + {file = "portalocker-2.8.2.tar.gz", hash = "sha256:2b035aa7828e46c58e9b31390ee1f169b98e1066ab10b9a6a861fe7e25ee4f33"}, +] + +[package.dependencies] +pywin32 = {version = ">=226", markers = "platform_system == \"Windows\""} + +[package.extras] +docs = ["sphinx (>=1.7.1)"] +redis = ["redis"] +tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "pytest-timeout (>=2.1.0)", "redis", "sphinx (>=6.0.0)", "types-redis"] + [[package]] name = "pre-commit" version = "3.3.3" @@ -2291,7 +2417,7 @@ testing = ["google-api-core[grpc] (>=1.31.5)"] name = "protobuf" version = "4.24.1" description = "" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "protobuf-4.24.1-cp310-abi3-win32.whl", hash = "sha256:d414199ca605eeb498adc4d2ba82aedc0379dca4a7c364ff9bc9a179aa28e71b"}, @@ -2571,6 +2697,29 @@ files = [ {file = "pytz-2023.3.tar.gz", hash = "sha256:1d8ce29db189191fb55338ee6d0387d82ab59f3d00eac103412d64e0ebd0c588"}, ] +[[package]] +name = "pywin32" +version = "306" +description = "Python for Window Extensions" +optional = false +python-versions = "*" +files = [ + {file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"}, + {file = "pywin32-306-cp310-cp310-win_amd64.whl", hash = "sha256:84f4471dbca1887ea3803d8848a1616429ac94a4a8d05f4bc9c5dcfd42ca99c8"}, + {file = "pywin32-306-cp311-cp311-win32.whl", hash = "sha256:e65028133d15b64d2ed8f06dd9fbc268352478d4f9289e69c190ecd6818b6407"}, + {file = "pywin32-306-cp311-cp311-win_amd64.whl", hash = "sha256:a7639f51c184c0272e93f244eb24dafca9b1855707d94c192d4a0b4c01e1100e"}, + {file = "pywin32-306-cp311-cp311-win_arm64.whl", hash = "sha256:70dba0c913d19f942a2db25217d9a1b726c278f483a919f1abfed79c9cf64d3a"}, + {file = "pywin32-306-cp312-cp312-win32.whl", hash = "sha256:383229d515657f4e3ed1343da8be101000562bf514591ff383ae940cad65458b"}, + {file = "pywin32-306-cp312-cp312-win_amd64.whl", hash = "sha256:37257794c1ad39ee9be652da0462dc2e394c8159dfd913a8a4e8eb6fd346da0e"}, + {file = "pywin32-306-cp312-cp312-win_arm64.whl", hash = "sha256:5821ec52f6d321aa59e2db7e0a35b997de60c201943557d108af9d4ae1ec7040"}, + {file = "pywin32-306-cp37-cp37m-win32.whl", hash = "sha256:1c73ea9a0d2283d889001998059f5eaaba3b6238f767c9cf2833b13e6a685f65"}, + {file = "pywin32-306-cp37-cp37m-win_amd64.whl", hash = "sha256:72c5f621542d7bdd4fdb716227be0dd3f8565c11b280be6315b06ace35487d36"}, + {file = "pywin32-306-cp38-cp38-win32.whl", hash = "sha256:e4c092e2589b5cf0d365849e73e02c391c1349958c5ac3e9d5ccb9a28e017b3a"}, + {file = "pywin32-306-cp38-cp38-win_amd64.whl", hash = "sha256:e8ac1ae3601bee6ca9f7cb4b5363bf1c0badb935ef243c4733ff9a393b1690c0"}, + {file = "pywin32-306-cp39-cp39-win32.whl", hash = "sha256:e25fd5b485b55ac9c057f67d94bc203f3f6595078d1fb3b458c9c28b7153a802"}, + {file = "pywin32-306-cp39-cp39-win_amd64.whl", hash = "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4"}, +] + [[package]] name = "pyyaml" version = "6.0.1" @@ -2583,6 +2732,7 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -2590,8 +2740,16 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -2608,6 +2766,7 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -2615,11 +2774,35 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, ] +[[package]] +name = "qdrant-client" +version = "1.7.1" +description = "Client library for the Qdrant vector search engine" +optional = false +python-versions = ">=3.8" +files = [ + {file = "qdrant_client-1.7.1-py3-none-any.whl", hash = "sha256:b6b52007d7dce339007f6fa676a611fbc61609d374aa00d8596b62ea45d831d7"}, + {file = "qdrant_client-1.7.1.tar.gz", hash = "sha256:7e3660b540e5bddda555a1638dd7905df9963fefe2061712fc5f48bfd78d734d"}, +] + +[package.dependencies] +grpcio = ">=1.41.0" +grpcio-tools = ">=1.41.0" +httpx = {version = ">=0.14.0", extras = ["http2"]} +numpy = {version = ">=1.21", markers = "python_version >= \"3.8\" and python_version < \"3.12\""} +portalocker = ">=2.7.0,<3.0.0" +pydantic = ">=1.10.8" +urllib3 = ">=1.26.14,<3" + +[package.extras] +fastembed = ["fastembed (==0.1.1)"] + [[package]] name = "redis" version = "4.6.0" @@ -3691,4 +3874,4 @@ transcribers = ["google-cloud-speech"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<3.12" -content-hash = "280732b3a6ac72e26b018edf6f19dd19d01644b5725c2ecb0d7c713d84251c79" +content-hash = "833574ce9f82a64e1070d693307c23eababeae46973e26e690a2e85ddc0204b5" diff --git a/pyproject.toml b/pyproject.toml index 4b0d71378..b2e563249 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,7 @@ langchain = "^0.0.198" google-cloud-aiplatform = {version = "^1.26.0", optional = true} miniaudio = "^1.59" boto3 = "^1.28.28" +qdrant-client = "^1.7.1" [tool.poetry.group.lint.dependencies] diff --git a/vocode/streaming/models/vector_db.py b/vocode/streaming/models/vector_db.py index 719c9ef02..0b33547c7 100644 --- a/vocode/streaming/models/vector_db.py +++ b/vocode/streaming/models/vector_db.py @@ -8,6 +8,7 @@ class VectorDBType(str, Enum): BASE = "vector_db_base" PINECONE = "vector_db_pinecone" + QDRANT = "vector_db_qdrant" class VectorDBConfig(TypedModel, type=VectorDBType.BASE.value): @@ -19,3 +20,7 @@ class PineconeConfig(VectorDBConfig, type=VectorDBType.PINECONE.value): api_key: Optional[str] api_environment: Optional[str] top_k: int = 3 + +class QdrantConfig(VectorDBConfig, type=VectorDBType.QDRANT.value): + index: str + top_k: Optional[int] = 3 \ No newline at end of file diff --git a/vocode/streaming/vector_db/qdrant.py b/vocode/streaming/vector_db/qdrant.py new file mode 100644 index 000000000..0def49438 --- /dev/null +++ b/vocode/streaming/vector_db/qdrant.py @@ -0,0 +1,85 @@ +import logging +from typing import Iterable, List, Optional, Tuple +import uuid +import os +from langchain.docstore.document import Document +from vocode import getenv +from vocode.streaming.models.vector_db import QdrantConfig +from vocode.streaming.vector_db.base_vector_db import VectorDB +from qdrant_client import QdrantClient +from qdrant_client.http.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, MatchValue + +logging.basicConfig() +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + + +class QdrantDB(VectorDB): + def __init__(self, config: QdrantConfig, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + self.config = config + self.collection_name = self.config.index + self.top_k = self.config.top_k or 1 + self.client = QdrantClient(os.getenv('QDRANT_HOST','localhost'), port=os.getenv('QDRANT_PORT','6333')) + self._text_key = "text" + + async def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + ids: Optional[List[str]] = None, + ) -> List[str]: + if ids is None: + ids = [str(uuid.uuid4()) for _ in texts] + + points = [] + for i, text in enumerate(texts): + embedding = await self.create_openai_embedding(text) + metadata = metadatas[i] if metadatas else {} + metadata[self._text_key] = text + points.append(PointStruct(id=ids[i], vector=embedding, payload=metadata)) + + try: + self.client.upsert( + collection_name=self.collection_name, + wait=True, + points=points + ) + except Exception as e: + logger.error(f"Error upserting points: {e}") + + return ids + + async def similarity_search_with_score( + self, + query: str, + filter: Optional[dict] = None, + ) -> List[Tuple[Document, float]]: + query_vector = await self.create_openai_embedding(query) + qdrant_filter = None + if filter: + qdrant_filter = Filter(must=[FieldCondition(key=k, match=MatchValue(value=v)) for k, v in filter.items()]) + + try: + search_result = self.client.search( + collection_name=self.collection_name, + query_vector=query_vector, + query_filter=qdrant_filter, + with_payload=True, + limit=self.config.top_k + ) + except Exception as e: + logger.error(f"Error during search: {e}") + return [] + + docs = [] + for res in search_result: + payload = res.payload + metadata = payload.get("metadata", {}) + content = payload.get("content", "") + score = res.score + + docs.append((Document(page_content=content.replace(r"\n","\n"), metadata=metadata), score)) + + return docs +