-
Notifications
You must be signed in to change notification settings - Fork 3
/
app.py
289 lines (192 loc) · 9.97 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
import os
import cv2
import PIL
import numpy as np
import google.generativeai as genai
import streamlit as st
from streamlit_extras.add_vertical_space import add_vertical_space
from mediapipe.python.solutions import hands, drawing_utils
from dotenv import load_dotenv
from warnings import filterwarnings
filterwarnings(action='ignore')
class calculator:
def streamlit_config(self):
# page configuration
st.set_page_config(page_title='Calculator', layout="wide")
# page header transparent color and Removes top padding
page_background_color = """
<style>
[data-testid="stHeader"]
{
background: rgba(0,0,0,0);
}
.block-container {
padding-top: 0rem;
}
</style>
"""
st.markdown(page_background_color, unsafe_allow_html=True)
# title and position
st.markdown(f'<h1 style="text-align: center;">Virtual Calculator</h1>',
unsafe_allow_html=True)
add_vertical_space(1)
def __init__(self):
# Load the Env File for Secrect API Key
load_dotenv()
# Initialize a Webcam to Capture Video and Set Width, Height and Brightness
self.cap = cv2.VideoCapture(0)
self.cap.set(propId=cv2.CAP_PROP_FRAME_WIDTH, value=950)
self.cap.set(propId=cv2.CAP_PROP_FRAME_HEIGHT, value=550)
self.cap.set(propId=cv2.CAP_PROP_BRIGHTNESS, value=130)
# Initialize Canvas Image
self.imgCanvas = np.zeros(shape=(550,950,3), dtype=np.uint8)
# Initializes a MediaPipe Hand object
self.mphands = hands.Hands(max_num_hands=1, min_detection_confidence=0.75)
# Set Drwaing Origin to Zero
self.p1, self.p2 = 0, 0
# Set Previous Time is Zero for FPS
self.p_time = 0
# Create Fingers Open/Close Position List
self.fingers = []
def process_frame(self):
# Reading the Video Capture to return the Success and Image Frame
success, img = self.cap.read()
# Resize the Image
img = cv2.resize(src=img, dsize=(950,550))
# Flip the Image Horizontally for a Later Selfie_View Display
self.img = cv2.flip(src=img, flipCode=1)
# BGR Image Convert to RGB Image
self.imgRGB = cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB)
def process_hands(self):
# Processes an RGB Image and Returns the Hand Landmarks and Handedness of each Detected Hand
result = self.mphands.process(image=self.imgRGB)
# Draws the landmarks and the connections on the image
self.landmark_list = []
if result.multi_hand_landmarks:
for hand_lms in result.multi_hand_landmarks:
drawing_utils.draw_landmarks(image=self.img, landmark_list=hand_lms,
connections=hands.HAND_CONNECTIONS)
# Extract ID and Origin for Each Landmarks
for id, lm in enumerate(hand_lms.landmark):
h, w, c = self.img.shape
x, y = lm.x, lm.y
cx, cy = int(x * w), int(y * h)
self.landmark_list.append([id, cx, cy])
def identify_fingers(self):
# Identify Each Fingers Open/Close Position
self.fingers = []
# Verify the Hands Detection in Web Camera
if self.landmark_list != []:
for id in [4,8,12,16,20]:
# Index Finger, Middle Finger, Ring Finger and Pinky Finger
if id != 4:
if self.landmark_list[id][2] < self.landmark_list[id-2][2]:
self.fingers.append(1)
else:
self.fingers.append(0)
# thumb Finger
else:
if self.landmark_list[id][1] < self.landmark_list[id-2][1]:
self.fingers.append(1)
else:
self.fingers.append(0)
# Identify Finger Open Position
for i in range(0, 5):
if self.fingers[i] == 1:
cx, cy = self.landmark_list[(i+1)*4][1], self.landmark_list[(i+1)*4][2]
cv2.circle(img=self.img, center=(cx,cy), radius=5, color=(255,0,255), thickness=1)
def handle_drawing_mode(self):
# Both Thumb and Index Fingers Up in Drwaing Mode
if sum(self.fingers) == 2 and self.fingers[0]==self.fingers[1]==1:
cx, cy = self.landmark_list[8][1], self.landmark_list[8][2]
if self.p1 == 0 and self.p2 == 0:
self.p1, self.p2 = cx, cy
cv2.line(img=self.imgCanvas, pt1=(self.p1,self.p2), pt2=(cx,cy), color=(255,0,255), thickness=5)
self.p1,self.p2 = cx,cy
# Thumb, Index & Middle Fingers UP ---> Disable the Points Connection
elif sum(self.fingers) == 3 and self.fingers[0]==self.fingers[1]==self.fingers[2]==1:
self.p1, self.p2 = 0, 0
# Both Thumb and Middle Fingers Up ---> Erase the Drawing Lines
elif sum(self.fingers) == 2 and self.fingers[0]==self.fingers[2]==1:
cx, cy = self.landmark_list[12][1], self.landmark_list[12][2]
if self.p1 == 0 and self.p2 == 0:
self.p1, self.p2 = cx, cy
cv2.line(img=self.imgCanvas, pt1=(self.p1,self.p2), pt2=(cx,cy), color=(0,0,0), thickness=15)
self.p1,self.p2 = cx,cy
# Both Thumb and Pinky Fingers Up ---> Erase the Whole Thing (Reset)
elif sum(self.fingers) == 2 and self.fingers[0]==self.fingers[4]==1:
self.imgCanvas = np.zeros(shape=(550,950,3), dtype=np.uint8)
def blend_canvas_with_feed(self):
# Blend the Live Camera Feed and Canvas Images ---> Canvas Image Top on it the Original Transparency Image
img = cv2.addWeighted(src1=self.img, alpha=0.7, src2=self.imgCanvas, beta=1, gamma=0)
# Canvas_BGR Image Convert to Gray Scale Image ---> Maintain Intensity of Color Image
imgGray = cv2.cvtColor(self.imgCanvas, cv2.COLOR_BGR2GRAY)
# Gray Image Convert to Binary_Inverse Image ---> Gray Shades into only Two Colors (Black/White) based Threshold
_, imgInv = cv2.threshold(src=imgGray, thresh=50, maxval=255, type=cv2.THRESH_BINARY_INV)
# Binary_Inverse Image Convert into BGR Image ---> Single Channel Value apply All 3 Channel [0,0,0] or [255,255,255]
# Bleding need same Channel for Both Images
imgInv = cv2.cvtColor(imgInv, cv2.COLOR_GRAY2BGR)
# Blending both Images ---> Binary_Inverse Image Black/White Top on Original Image
img = cv2.bitwise_and(src1=img, src2=imgInv)
# Canvas Color added on the Top on Original Image
self.img = cv2.bitwise_or(src1=img, src2=self.imgCanvas)
def analyze_image_with_genai(self):
# Canvas_BGR Image Convert to RGB Image
imgCanvas = cv2.cvtColor(self.imgCanvas, cv2.COLOR_BGR2RGB)
# Numpy Array Convert to PIL Image
imgCanvas = PIL.Image.fromarray(imgCanvas)
# Configures the genai Library
genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
# Initializes a Flash Generative Model
model = genai.GenerativeModel(model_name = 'gemini-1.5-flash')
# Input Prompt
prompt = "Analyze the image and provide the following:\n" \
"* The mathematical equation represented in the image.\n" \
"* The solution to the equation.\n" \
"* A short and sweet explanation of the steps taken to arrive at the solution."
# Sends Request to Model to Generate Content using a Text Prompt and Image
response = model.generate_content([prompt, imgCanvas])
# Extract the Text Content of the Model’s Response.
return response.text
def main(self):
col1, _, col3 = st.columns([0.8, 0.02, 0.18])
with col1:
# Stream the webcam video
stframe = st.empty()
with col3:
# Placeholder for result output
st.markdown(f'<h5 style="text-position:center;color:green;">OUTPUT:</h5>', unsafe_allow_html=True)
result_placeholder = st.empty()
while True:
if not self.cap.isOpened():
add_vertical_space(5)
st.markdown(body=f'<h4 style="text-position:center; color:orange;">Error: Could not open webcam. \
Please ensure your webcam is connected and try again</h4>',
unsafe_allow_html=True)
break
self.process_frame()
self.process_hands()
self.identify_fingers()
self.handle_drawing_mode()
self.blend_canvas_with_feed()
# Display the Output Frame in the Streamlit App
self.img = cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB)
stframe.image(self.img, channels="RGB")
# After Done Process with AI
if sum(self.fingers) == 2 and self.fingers[1]==self.fingers[2]==1:
result = self.analyze_image_with_genai()
result_placeholder.write(f"Result: {result}")
# Release the camera and close windows
self.cap.release()
cv2.destroyAllWindows()
try:
# Creating an instance of the class
calc = calculator()
# Streamlit Configuration Setup
calc.streamlit_config()
# Calling the main method
calc.main()
except Exception as e:
add_vertical_space(5)
# Displays the Error Message
st.markdown(f'<h5 style="text-position:center;color:orange;">{e}</h5>', unsafe_allow_html=True)