forked from mgrofsky/GoogleAI-Pi
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
81 lines (58 loc) · 2.19 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import picamera
import time
import os
from google.cloud import vision
from google.cloud import texttospeech
# Needs permission for Cloud Vision API and Cloud Text-to-Speech API
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="/dir/YourServiceAccount.json"
client_vision = vision.ImageAnnotatorClient()
client_tts = texttospeech.TextToSpeechClient()
def takephoto():
camera = picamera.PiCamera()
camera.resolution = (1024, 768)
camera.start_preview()
# Show me a quick preview before snapping the photo
time.sleep(1)
# Take the photo
camera.capture('image.jpg')
def main():
takephoto()
with open('image.jpg', 'rb') as image_file:
content = image_file.read()
image = vision.types.Image(content=content)
response = client_vision.label_detection(image=image)
response = client_vision.label(image=image)
labels = response.label_annotations
print('Labels:')
synthesis_input = ''
# Make a simple comma delimited string type sentence.
for label in labels:
print(label.description)
synthesis_input = label.description + ', ' + synthesis_input
synthesis_in = texttospeech.SynthesisInput(text=synthesis_input)
# Let's make this a premium Wavenet voice in SSML
voice = texttospeech.VoiceSelectionParams(
language_code="en-US",
name="en-US-Wavenet-A",
ssml_gender=texttospeech.SsmlVoiceGender.MALE
)
# Select the type of audio file you want returned
audio_config = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.MP3
)
# Perform the text-to-speech request on the text input with the selected
# voice parameters and audio file type
response = client_tts.synthesize_speech(
input=synthesis_in, voice=voice, audio_config=audio_config
)
# The response's audio_content is binary.
with open("output.mp3", "wb") as out:
# Write the response to the output file.
out.write(response.audio_content)
print('Audio content written to file "output.mp3"')
file = "output.mp3"
# apt install mpg123
# Save the audio file to the dir
os.system("mpg123 " + file)
if __name__ == '__main__':
main()