Skip to content

Commit

Permalink
v1.0.6 adding custom voice support, in response to #13
Browse files Browse the repository at this point in the history
  • Loading branch information
jeroenterheerdt committed Dec 8, 2020
1 parent 6267feb commit 5befc89
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 40 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,12 @@ with open("file2.wav", "wb") as f:
f.write(data)
```

You can also use custom voice by specifying `isCustom=True` and providing a `customEndpoint`:
```python
from pycsspeechtts import TTSTranslator
t = TTSTranslator("YOUR API KEY","westeurope", isCustom=True, customEndpoint=MyEndpoint)
data = t.speak(language='en-gb',gender='Male',voiceType="ArchieNeural",text="This is a test for custom voice")
```

See test.py for more samples.
Refer to https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support to find the valid values for language, gender, voicetype and output formats.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
with open("README.md", "r") as fh:
long_description = fh.read()
setup(name='pycsspeechtts',
version='1.0.5',
version='1.0.6',
description='Python 3 interface to Microsoft Cognitive Services Text To Speech',
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down
56 changes: 36 additions & 20 deletions src/pycsspeechtts/pycsspeechtts.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,20 @@ class TTSTranslator(object):
Interface class for the Microsoft Cognitive Services Text-to-speech translator
"""

def __init__(self, apiKey, region="eastus"):
def __init__(self, apiKey, region="eastus", isCustom=False, customEndpoint=None):
self._apiKey = apiKey
self._geoLocation = region
self._isCustom = isCustom
self._customEndpoint=customEndpoint

headers = {"Ocp-Apim-Subscription-Key": self._apiKey}
response = requests.post(AccessTokenUrlTemplate.format(
self._geoLocation), headers=headers)
response.raise_for_status()
if not self._isCustom:
response = requests.post(AccessTokenUrlTemplate.format(
self._geoLocation), headers=headers)
response.raise_for_status()
self._accesstoken = str(response.text)
_LOGGER.debug("Connection Initialized OK")
self._accesstoken = str(response.text)


def speak(self, language="en-us", gender="Female", voiceType="JessaNeural",
output="riff-24khz-16bit-mono-pcm", rate="+0.00%", volume="+0.00%",
Expand All @@ -44,23 +48,35 @@ def name_lang(language):
voice.set(
'name', 'Microsoft Server Speech Text to Speech Voice ('+name_lang(language)+', '+voiceType+')')

prosody = ElementTree.SubElement(voice, 'prosody')
prosody.set('rate', rate)
prosody.set('volume', volume)
prosody.set('pitch', pitch)
prosody.set('contour', contour)
prosody.text = text

headers = {"Content-Type": "application/ssml+xml",
"X-Microsoft-OutputFormat": output,
"Authorization": "Bearer " + self._accesstoken,
"X-Search-AppId": "07D3234E49CE426DAA29772419F436CA",
"X-Search-ClientID": "1ECFAE91408841A480F00935DC390960",
"User-Agent": "PYCSSpeechTTS"
}
endpoint = None
if self._isCustom:
# this is a custom voice
endpoint = self._customEndpoint
headers = {"Content-Type": "application/ssml+xml",
"X-Microsoft-OutputFormat": output,
"Ocp-Apim-Subscription-Key": self._apiKey,
"User-Agent": "PYCSSpeechTTS"
}
voice.text = text
else:
# not a custom voice, generate the endpoint
endpoint = SpeechUrlTemplate.format(self._geoLocation)
headers = {"Content-Type": "application/ssml+xml",
"X-Microsoft-OutputFormat": output,
"Authorization": "Bearer " + self._accesstoken,
"X-Search-AppId": "07D3234E49CE426DAA29772419F436CA",
"X-Search-ClientID": "1ECFAE91408841A480F00935DC390960",
"User-Agent": "PYCSSpeechTTS"
}
prosody = ElementTree.SubElement(voice, 'prosody')
prosody.set('rate', rate)
prosody.set('volume', volume)
prosody.set('pitch', pitch)
prosody.set('contour', contour)
prosody.text = text

response = requests.post(
SpeechUrlTemplate.format(self._geoLocation), ElementTree.tostring(body), headers=headers)
endpoint, ElementTree.tostring(body), headers=headers)
if response.status_code == requests.codes.ok:
_LOGGER.debug("Text synthesis OK")
return response.content
Expand Down
45 changes: 26 additions & 19 deletions src/pycsspeechtts/test.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,33 @@
from pycsspeechtts import TTSTranslator
t = TTSTranslator("YOUR_API_KEY")

# Speaking with default language of english US and default Female voice
data = t.speak(text='This is a test')
# Change speed with -50%
data = t.speak(text="This is a test", rate="-50%")
# Change pitch to high
data = t.speak(text="This is a test", pitch="high")
# Change volume to +20%
data = t.speak(text="This is a test", volume="+20%")
# See https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support for the accepted values of the parameters below
data = t.speak('en-GB', 'Male', 'George, Apollo',
'riff-16khz-16bit-mono-pcm', text='I am Max')
data = t.speak('cs-CZ', 'Male', 'Jakub', text='Pojďme vyzkoušet klasickou českou testovací větu. Příliš žluťoučký kůň úpěl ďábelské ódy.')
# Using contour to change pitch from normal at 0% of speech and +100% at 100% of speech
data = t.speak(text="The Wall Street Journal - which says it's spoken to people close to the ongoing investigation - says the information it has paints a picture of a catastrophic failure that quickly overwhelmed the flight crew",
contour="(0%,+0%) (100%,+100%)")
useCustom = True
api_key = "YOUR_API_KEY"
custom_endpoint = "custom_endpoint"

if not useCustom:
t = TTSTranslator(api_key, region="westus")
# Speaking with default language of english US and default Female voice
data = t.speak(text='This is a test')
# Change speed with -50%
data = t.speak(text="This is a test", rate="-50%")
# Change pitch to high
data = t.speak(text="This is a test", pitch="high")
# Change volume to +20%
data = t.speak(text="This is a test", volume="+20%")
# See https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support for the accepted values of the parameters below
data = t.speak('en-GB', 'Male', 'George, Apollo',
'riff-16khz-16bit-mono-pcm', text='I am Max')
data = t.speak('cs-CZ', 'Male', 'Jakub', text='Pojďme vyzkoušet klasickou českou testovací větu. Příliš žluťoučký kůň úpěl ďábelské ódy.')
# Using contour to change pitch from normal at 0% of speech and +100% at 100% of speech
data = t.speak(text="The Wall Street Journal - which says it's spoken to people close to the ongoing investigation - says the information it has paints a picture of a catastrophic failure that quickly overwhelmed the flight crew",
contour="(0%,+0%) (100%,+100%)")
else:
# Test custom voice
t = TTSTranslator(api_key, region="westus", isCustom=True, customEndpoint=custom_endpoint)
data = t.speak(language='en-gb',gender='Male',voiceType="ArchieNeural",text="This is a test for custom voice")

if data == None:
print("An error occured")
print("An error occurred")
else:
with open("file.wav", "wb") as f:
f.write(data)
print("Succes! Open file.wav to hear the results")
print("Success! Open file.wav to hear the results")

0 comments on commit 5befc89

Please sign in to comment.