From a0cd49d79f853b8564bb423ebbf952eff12fd8bd Mon Sep 17 00:00:00 2001
From: Dmitry Kovalev <dkovalev@google.com>
Date: Wed, 14 Nov 2018 13:48:59 -0800
Subject: [PATCH] Fix check_cloud.py script.

Change-Id: I1fd05ea23323498596c52b5fa47c44d6b5aa6546
---
 checkpoints/check_cloud.py | 49 +++++++-------------------------------
 src/aiy/cloudspeech.py     | 23 +++++++++++++++---
 2 files changed, 28 insertions(+), 44 deletions(-)

diff --git a/checkpoints/check_cloud.py b/checkpoints/check_cloud.py
index 72c14022..2add6dee 100755
--- a/checkpoints/check_cloud.py
+++ b/checkpoints/check_cloud.py
@@ -12,18 +12,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Check that the Cloud Speech API can be used."""
 
 import json
 import os
-import os.path
-import sys
 import traceback
 
-sys.path.append(os.path.realpath(os.path.join(__file__, '..', '..')) + '/src/')
-
-import aiy._apis._speech  # noqa
+from aiy.cloudspeech import CloudSpeechClient
 
 OLD_CREDENTIALS_FILE = os.path.expanduser('~/credentials.json')
 NEW_CREDENTIALS_FILE = os.path.expanduser('~/cloud_speech.json')
@@ -33,17 +28,6 @@
 else:
     CREDENTIALS_PATH = NEW_CREDENTIALS_FILE
 
-ROOT_PATH = os.path.realpath(os.path.join(__file__, '..', '..'))
-PYTHON3 = ROOT_PATH + '/env/bin/python3'
-SPEECH_PY = ROOT_PATH + '/src/aiy/_apis/_speech.py'
-SPEECH_PY_ENV = {
-    'VIRTUAL_ENV': ROOT_PATH + '/env',
-    'PATH': ROOT_PATH + '/env/bin:' + os.getenv('PATH'),
-}
-TEST_AUDIO = ROOT_PATH + '/checkpoints/test_hello.raw'
-RECOGNIZED_TEXT = 'hello'
-
-
 def check_credentials_valid():
     """Check the credentials are JSON service credentials."""
     try:
@@ -53,28 +37,12 @@ def check_credentials_valid():
 
     return 'type' in obj and obj['type'] == 'service_account'
 
-
 def check_speech_reco():
-    """Try to test the speech recognition code from AIY APIs."""
-    print('Testing the Google Cloud Speech API...')
-    req = aiy._apis._speech.CloudSpeechRequest(CREDENTIALS_PATH)
-    with open(TEST_AUDIO, 'rb') as f:
-        while True:
-            chunk = f.read(64000)
-            if not chunk:
-                break
-            req.add_data(chunk)
-    req.end_audio()
-    output = req.do_request()
-
-    if RECOGNIZED_TEXT in output:
-        return True
-
-    print('Speech recognition failed or output not as expected:')
-    print(output)
-    print('Expected:', RECOGNIZED_TEXT)
-    return False
-
+    path = os.path.join(os.path.dirname(__file__), 'test_hello.raw')
+    with open(path, 'rb') as f:
+        client = CloudSpeechClient()
+        result = client.recognize_bytes(f.read())
+        return result.strip() == 'hello'
 
 def main():
     """Run all checks and print status."""
@@ -96,13 +64,12 @@ def main():
         print('Failed to test the Cloud Speech API. Please see error above.')
         return
 
-    print("Everything's set up to use the Google Cloud.")
-
+    print("Everything is set up to use the Google Cloud.")
 
 if __name__ == '__main__':
     try:
         main()
-        input('Press Enter to close...')
     except Exception:
         traceback.print_exc()
+    finally:
         input('Press Enter to close...')
diff --git a/src/aiy/cloudspeech.py b/src/aiy/cloudspeech.py
index 0c360bd6..b2bda695 100644
--- a/src/aiy/cloudspeech.py
+++ b/src/aiy/cloudspeech.py
@@ -40,15 +40,32 @@ def __init__(self, service_accout_file=None):
         credentials = service_account.Credentials.from_service_account_file(service_accout_file)
         self._client = speech.SpeechClient(credentials=credentials)
 
-    def recognize(self, language_code='en-US', hint_phrases=None):
-        config = speech.types.RecognitionConfig(
+    def _make_config(self, language_code, hint_phrases):
+        return speech.types.RecognitionConfig(
             encoding=speech.types.RecognitionConfig.LINEAR16,
             sample_rate_hertz=AUDIO_SAMPLE_RATE_HZ,
             language_code=language_code,
             speech_contexts=[speech.types.SpeechContext(phrases=hint_phrases)])
 
+    def recognize_bytes(self, data, language_code='en-US', hint_phrases=None):
+        """Data must be encoded according to the AUDIO_FORMAT."""
+        streaming_config=speech.types.StreamingRecognitionConfig(
+            config=self._make_config(language_code, hint_phrases),
+            single_utterance=True)
+        responses = self._client.streaming_recognize(
+            config=streaming_config,
+            requests=[speech.types.StreamingRecognizeRequest(audio_content=data)])
+
+        for response in responses:
+            for result in response.results:
+                if result.is_final:
+                    return result.alternatives[0].transcript
+
+        return None
+
+    def recognize(self, language_code='en-US', hint_phrases=None):
         streaming_config=speech.types.StreamingRecognitionConfig(
-            config=config,
+            config=self._make_config(language_code, hint_phrases),
             single_utterance=True)
 
         with Recorder() as recorder: