-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlivestream.py
208 lines (178 loc) · 7.27 KB
/
livestream.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
# Import necessary libraries and modules
import os
from openai import OpenAI
import openai
import base64
import requests
import cv2
from gtts import gTTS
import os
import keyboard
import threading
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
# Set OpenAI API key from environment variable
openai.api_key = os.getenv("OPENAI_API_KEY")
# Initialize OpenAI client
client = OpenAI()
# Function to get description for a single image using OpenAI GPT-4 Vision model
def image_description_1_image(image):
"""
Generate a textual description for a single image using OpenAI GPT-4 Vision model.
Args:
image (str): Base64-encoded image.
Returns:
str: Generated textual description.
"""
# Set headers for API request
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {openai.api_key}"
}
# Define payload for API request
payload = {
"model": "gpt-4-vision-preview",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": """Describe the image. Keep it brief. Don't start with 'The image shows'. Just give the description."""
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{image}"
}
},
]
}
],
"max_tokens": 350
}
# Make API request and return the generated description
response = requests.post("https://api.openai.com/v1/chat/completions", headers = headers, json = payload)
return response.json()['choices'][0]['message']['content']
# Function to get description for two consecutive images with contextual analysis
def image_description_2_images(image1, image2, image_description_prev):
"""
Generate a textual description for two consecutive images with contextual analysis using OpenAI GPT-4 Vision model.
Args:
image1 (str): Base64-encoded previous frame image.
image2 (str): Base64-encoded current frame image.
image_description_prev (str): Description of the previous frame image.
Returns:
str: Generated textual description for the current frame.
"""
# Set headers for API request
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {openai.api_key}"
}
# Define payload for API request
payload = {
"model": "gpt-4-vision-preview",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": f"""The images are two consecutive frames of a continuous live video.
The first image is of the previous frame, and the second image is of the current frame.
The description of the previous frame is {image_description_prev}.
Compare the two images and also compare the description of both the frames.
Then describe the current frame or anything new comes in the description.
Don't repeat anything that is already there in the previous frame or {image_description_prev}.
Always make a connection between the two frames and between the current description and {image_description_prev}
as these are the images from a continuous live video feed. Don't mention anything about comparison in your final answer.
Just describe the current frame after doing the above analysis. Don't start with 'in this frame'. Keep it brief."""
},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{image1}"}
},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{image2}"}
},
]
}
],
"max_tokens": 300
}
# Make API request and return the generated description
response = requests.post("https://api.openai.com/v1/chat/completions", headers = headers, json = payload)
return response.json()['choices'][0]['message']['content']
# Function to convert text to speech using OpenAI TTS-1-HD model
def T_T_speech(text, Language = 'en'):
# have a high speed
myobj = gTTS(text = text, lang = Language, slow=False)
# Saving the converted audio in a mp3 file named
myobj.save("read_aloud.mp3")
os.system("read_aloud.mp3")
# Set up video capture from the default camera (index 0)
cap = cv2.VideoCapture("https://11.47.255.44:8080/video?type=some.mjpeg")
# Function to continuously describe live video frames and display the video
def live_video_description(cycle, cap = cap, window_name = 'frame'):
"""
Continuously describe live video frames and display the video.
Args:
cycle (int): Number of frames to wait before describing the next frame.
cap (cv2.VideoCapture): Video capture object.
window_name (str): Name of the window to display the video.
"""
if not cap.isOpened():
return
image_description_prev = ""
n = 0
i = 0
while True:
ret, frame = cap.read()
if not ret:
break
if keyboard.is_pressed("q"):
break
if n == cycle:
n = 0
_, buffer = cv2.imencode('.jpg', frame)
base64_frame = base64.b64encode(buffer).decode('utf-8')
# Call appropriate function based on whether it's the first frame or not
if i == 0:
image_description_crnt = image_description_1_image(base64_frame)
else:
image_description_crnt = image_description_2_images(base64_frame_prev, base64_frame, image_description_prev)
# Print and speak the generated description
print("\n The Current frame shows", image_description_crnt, "\n")
T_T_speech(text = image_description_crnt)
# Update the previous description and frame for the next iteration
image_description_prev = " ".join([image_description_prev, image_description_crnt])
base64_frame_prev = base64_frame
i += 1
n += 1
# Function to display the live video feed
def display_video(cap = cap):
"""
Display the live video feed.
Args:
cap (cv2.VideoCapture): Video capture object.
"""
if not cap.isOpened():
return
while True:
ret, frame = cap.read()
if not ret:
break
cv2.imshow('frame', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Main function to start the video display and description threads
def main():
threading.Thread(target=display_video).start()
threading.Thread(target=live_video_description, args=(5,)).start()
cv2.destroyAllWindows()
# Entry point of the script
if __name__ == "__main__":
main()