From a0cd49d79f853b8564bb423ebbf952eff12fd8bd Mon Sep 17 00:00:00 2001 From: Dmitry Kovalev Date: Wed, 14 Nov 2018 13:48:59 -0800 Subject: [PATCH] Fix check_cloud.py script. Change-Id: I1fd05ea23323498596c52b5fa47c44d6b5aa6546 --- checkpoints/check_cloud.py | 49 +++++++------------------------------- src/aiy/cloudspeech.py | 23 +++++++++++++++--- 2 files changed, 28 insertions(+), 44 deletions(-) diff --git a/checkpoints/check_cloud.py b/checkpoints/check_cloud.py index 72c14022..2add6dee 100755 --- a/checkpoints/check_cloud.py +++ b/checkpoints/check_cloud.py @@ -12,18 +12,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Check that the Cloud Speech API can be used.""" import json import os -import os.path -import sys import traceback -sys.path.append(os.path.realpath(os.path.join(__file__, '..', '..')) + '/src/') - -import aiy._apis._speech # noqa +from aiy.cloudspeech import CloudSpeechClient OLD_CREDENTIALS_FILE = os.path.expanduser('~/credentials.json') NEW_CREDENTIALS_FILE = os.path.expanduser('~/cloud_speech.json') @@ -33,17 +28,6 @@ else: CREDENTIALS_PATH = NEW_CREDENTIALS_FILE -ROOT_PATH = os.path.realpath(os.path.join(__file__, '..', '..')) -PYTHON3 = ROOT_PATH + '/env/bin/python3' -SPEECH_PY = ROOT_PATH + '/src/aiy/_apis/_speech.py' -SPEECH_PY_ENV = { - 'VIRTUAL_ENV': ROOT_PATH + '/env', - 'PATH': ROOT_PATH + '/env/bin:' + os.getenv('PATH'), -} -TEST_AUDIO = ROOT_PATH + '/checkpoints/test_hello.raw' -RECOGNIZED_TEXT = 'hello' - - def check_credentials_valid(): """Check the credentials are JSON service credentials.""" try: @@ -53,28 +37,12 @@ def check_credentials_valid(): return 'type' in obj and obj['type'] == 'service_account' - def check_speech_reco(): - """Try to test the speech recognition code from AIY APIs.""" - print('Testing the Google Cloud Speech API...') - req = aiy._apis._speech.CloudSpeechRequest(CREDENTIALS_PATH) - with open(TEST_AUDIO, 'rb') as f: - while True: - chunk = f.read(64000) - if not chunk: - break - req.add_data(chunk) - req.end_audio() - output = req.do_request() - - if RECOGNIZED_TEXT in output: - return True - - print('Speech recognition failed or output not as expected:') - print(output) - print('Expected:', RECOGNIZED_TEXT) - return False - + path = os.path.join(os.path.dirname(__file__), 'test_hello.raw') + with open(path, 'rb') as f: + client = CloudSpeechClient() + result = client.recognize_bytes(f.read()) + return result.strip() == 'hello' def main(): """Run all checks and print status.""" @@ -96,13 +64,12 @@ def main(): print('Failed to test the Cloud Speech API. Please see error above.') return - print("Everything's set up to use the Google Cloud.") - + print("Everything is set up to use the Google Cloud.") if __name__ == '__main__': try: main() - input('Press Enter to close...') except Exception: traceback.print_exc() + finally: input('Press Enter to close...') diff --git a/src/aiy/cloudspeech.py b/src/aiy/cloudspeech.py index 0c360bd6..b2bda695 100644 --- a/src/aiy/cloudspeech.py +++ b/src/aiy/cloudspeech.py @@ -40,15 +40,32 @@ def __init__(self, service_accout_file=None): credentials = service_account.Credentials.from_service_account_file(service_accout_file) self._client = speech.SpeechClient(credentials=credentials) - def recognize(self, language_code='en-US', hint_phrases=None): - config = speech.types.RecognitionConfig( + def _make_config(self, language_code, hint_phrases): + return speech.types.RecognitionConfig( encoding=speech.types.RecognitionConfig.LINEAR16, sample_rate_hertz=AUDIO_SAMPLE_RATE_HZ, language_code=language_code, speech_contexts=[speech.types.SpeechContext(phrases=hint_phrases)]) + def recognize_bytes(self, data, language_code='en-US', hint_phrases=None): + """Data must be encoded according to the AUDIO_FORMAT.""" + streaming_config=speech.types.StreamingRecognitionConfig( + config=self._make_config(language_code, hint_phrases), + single_utterance=True) + responses = self._client.streaming_recognize( + config=streaming_config, + requests=[speech.types.StreamingRecognizeRequest(audio_content=data)]) + + for response in responses: + for result in response.results: + if result.is_final: + return result.alternatives[0].transcript + + return None + + def recognize(self, language_code='en-US', hint_phrases=None): streaming_config=speech.types.StreamingRecognitionConfig( - config=config, + config=self._make_config(language_code, hint_phrases), single_utterance=True) with Recorder() as recorder: