Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dynamic provider selection for telephony services #726

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions tests/fakedata/conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@
from vocode.streaming.output_device.audio_chunk import ChunkState
from vocode.streaming.streaming_conversation import StreamingConversation
from vocode.streaming.synthesizer.base_synthesizer import BaseSynthesizer
from vocode.streaming.telephony.constants import DEFAULT_CHUNK_SIZE, DEFAULT_SAMPLING_RATE
from vocode.streaming.telephony.constants import TWILIO_CHUNK_SIZE, TWILIO_SAMPLING_RATE
from vocode.streaming.transcriber.base_transcriber import BaseTranscriber
from vocode.streaming.transcriber.deepgram_transcriber import DeepgramEndpointingConfig
from vocode.streaming.utils.events_manager import EventsManager

DEFAULT_DEEPGRAM_TRANSCRIBER_CONFIG = DeepgramTranscriberConfig(
chunk_size=DEFAULT_CHUNK_SIZE,
sampling_rate=DEFAULT_SAMPLING_RATE,
chunk_size=TWILIO_CHUNK_SIZE,
sampling_rate=TWILIO_SAMPLING_RATE,
audio_encoding=AudioEncoding.MULAW,
endpointing_config=DeepgramEndpointingConfig(),
model="2-phonecall",
Expand All @@ -29,7 +29,7 @@

DEFAULT_SYNTHESIZER_CONFIG = PlayHtSynthesizerConfig(
voice_id="test_voice_id",
sampling_rate=DEFAULT_SAMPLING_RATE,
sampling_rate=TWILIO_SAMPLING_RATE,
audio_encoding=AudioEncoding.MULAW,
)

Expand Down Expand Up @@ -124,7 +124,7 @@ def create_fake_streaming_conversation(
synthesizer = synthesizer or create_fake_synthesizer(mocker, DEFAULT_SYNTHESIZER_CONFIG)
return StreamingConversation(
output_device=DummyOutputDevice(
sampling_rate=DEFAULT_SAMPLING_RATE, audio_encoding=AudioEncoding.MULAW
sampling_rate=TWILIO_SAMPLING_RATE, audio_encoding=AudioEncoding.MULAW
),
transcriber=transcriber,
agent=agent,
Expand Down
18 changes: 14 additions & 4 deletions vocode/streaming/models/synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@

from vocode.streaming.models.client_backend import OutputAudioConfig
from vocode.streaming.output_device.abstract_output_device import AbstractOutputDevice
from vocode.streaming.telephony.constants import DEFAULT_AUDIO_ENCODING, DEFAULT_SAMPLING_RATE
from vocode.streaming.telephony.constants import (
TWILIO_AUDIO_ENCODING,
TWILIO_SAMPLING_RATE,
VONAGE_AUDIO_ENCODING,
VONAGE_SAMPLING_RATE,
)

from .audio import AudioEncoding, SamplingRate
from .model import BaseModel, TypedModel
Expand Down Expand Up @@ -54,11 +59,16 @@ def from_output_device(cls, output_device: AbstractOutputDevice, **kwargs):
**kwargs
)

# TODO(EPD-186): switch to from_twilio_output_device and from_vonage_output_device
@classmethod
def from_telephone_output_device(cls, **kwargs):
def from_twilio_output_device(cls, **kwargs):
return cls(
sampling_rate=DEFAULT_SAMPLING_RATE, audio_encoding=DEFAULT_AUDIO_ENCODING, **kwargs
sampling_rate=TWILIO_SAMPLING_RATE, audio_encoding=TWILIO_AUDIO_ENCODING, **kwargs
)

@classmethod
def from_vonage_output_device(cls, **kwargs):
return cls(
sampling_rate=VONAGE_SAMPLING_RATE, audio_encoding=VONAGE_AUDIO_ENCODING, **kwargs
)

@classmethod
Expand Down
16 changes: 8 additions & 8 deletions vocode/streaming/models/telephony.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
TranscriberConfig,
)
from vocode.streaming.telephony.constants import (
DEFAULT_AUDIO_ENCODING,
DEFAULT_CHUNK_SIZE,
DEFAULT_SAMPLING_RATE,
TWILIO_AUDIO_ENCODING,
TWILIO_CHUNK_SIZE,
TWILIO_SAMPLING_RATE,
VONAGE_AUDIO_ENCODING,
VONAGE_CHUNK_SIZE,
VONAGE_SAMPLING_RATE,
Expand Down Expand Up @@ -121,9 +121,9 @@ class TwilioCallConfig(BaseCallConfig, type=CallConfigType.TWILIO.value): # typ
@staticmethod
def default_transcriber_config():
return DeepgramTranscriberConfig(
sampling_rate=DEFAULT_SAMPLING_RATE,
audio_encoding=DEFAULT_AUDIO_ENCODING,
chunk_size=DEFAULT_CHUNK_SIZE,
sampling_rate=TWILIO_SAMPLING_RATE,
audio_encoding=TWILIO_AUDIO_ENCODING,
chunk_size=TWILIO_CHUNK_SIZE,
model="phonecall",
tier="nova",
endpointing_config=PunctuationEndpointingConfig(),
Expand All @@ -132,8 +132,8 @@ def default_transcriber_config():
@staticmethod
def default_synthesizer_config():
return AzureSynthesizerConfig(
sampling_rate=DEFAULT_SAMPLING_RATE,
audio_encoding=DEFAULT_AUDIO_ENCODING,
sampling_rate=TWILIO_SAMPLING_RATE,
audio_encoding=TWILIO_AUDIO_ENCODING,
)


Expand Down
32 changes: 24 additions & 8 deletions vocode/streaming/models/transcriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,12 @@
from vocode.streaming.models.client_backend import InputAudioConfig
from vocode.streaming.models.model import BaseModel
from vocode.streaming.telephony.constants import (
DEFAULT_AUDIO_ENCODING,
DEFAULT_CHUNK_SIZE,
DEFAULT_SAMPLING_RATE,
TWILIO_AUDIO_ENCODING,
TWILIO_CHUNK_SIZE,
TWILIO_SAMPLING_RATE,
VONAGE_AUDIO_ENCODING,
VONAGE_CHUNK_SIZE,
VONAGE_SAMPLING_RATE,
)

from .audio import AudioEncoding
Expand Down Expand Up @@ -81,17 +84,30 @@ def from_input_device(
**kwargs,
)

# TODO(EPD-186): switch to from_twilio_input_device and from_vonage_input_device
@classmethod
def from_telephone_input_device(
def from_twilio_input_device(
cls,
endpointing_config: Optional[EndpointingConfig] = None,
**kwargs,
):
return cls(
sampling_rate=DEFAULT_SAMPLING_RATE,
audio_encoding=DEFAULT_AUDIO_ENCODING,
chunk_size=DEFAULT_CHUNK_SIZE,
sampling_rate=TWILIO_SAMPLING_RATE,
audio_encoding=TWILIO_AUDIO_ENCODING,
chunk_size=TWILIO_CHUNK_SIZE,
endpointing_config=endpointing_config,
**kwargs,
)

@classmethod
def from_vonage_input_device(
cls,
endpointing_config: Optional[EndpointingConfig] = None,
**kwargs,
):
return cls(
sampling_rate=VONAGE_SAMPLING_RATE,
audio_encoding=VONAGE_AUDIO_ENCODING,
chunk_size=VONAGE_CHUNK_SIZE,
endpointing_config=endpointing_config,
**kwargs,
)
Expand Down
4 changes: 2 additions & 2 deletions vocode/streaming/output_device/twilio_output_device.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from vocode.streaming.output_device.abstract_output_device import AbstractOutputDevice
from vocode.streaming.output_device.audio_chunk import AudioChunk, ChunkState
from vocode.streaming.telephony.constants import DEFAULT_AUDIO_ENCODING, DEFAULT_SAMPLING_RATE
from vocode.streaming.telephony.constants import TWILIO_AUDIO_ENCODING, TWILIO_SAMPLING_RATE
from vocode.streaming.utils.create_task import asyncio_create_task
from vocode.streaming.utils.dtmf_utils import DTMFToneGenerator, KeypadEntry
from vocode.streaming.utils.worker import InterruptibleEvent
Expand All @@ -28,7 +28,7 @@ class ChunkFinishedMarkMessage(BaseModel):

class TwilioOutputDevice(AbstractOutputDevice):
def __init__(self, ws: Optional[WebSocket] = None, stream_sid: Optional[str] = None):
super().__init__(sampling_rate=DEFAULT_SAMPLING_RATE, audio_encoding=DEFAULT_AUDIO_ENCODING)
super().__init__(sampling_rate=TWILIO_SAMPLING_RATE, audio_encoding=TWILIO_AUDIO_ENCODING)
self.ws = ws
self.stream_sid = stream_sid
self.active = True
Expand Down
7 changes: 3 additions & 4 deletions vocode/streaming/telephony/constants.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from vocode.streaming.models.audio import AudioEncoding, SamplingRate

# TODO(EPD-186): namespace as Twilio
DEFAULT_SAMPLING_RATE: int = SamplingRate.RATE_8000.value
DEFAULT_AUDIO_ENCODING = AudioEncoding.MULAW
DEFAULT_CHUNK_SIZE = 20 * 160
TWILIO_SAMPLING_RATE: int = SamplingRate.RATE_8000.value
TWILIO_AUDIO_ENCODING = AudioEncoding.MULAW
TWILIO_CHUNK_SIZE = 20 * 160
MULAW_SILENCE_BYTE = b"\xff"

VONAGE_SAMPLING_RATE: int = SamplingRate.RATE_16000.value
Expand Down