'Detect digit on a live video camera using OpenCV and TensorFlow
I tried the code provided below to detect digit in the video camera and put a contour around it then classify it using the H5 model but it's giving me bad results, just the camera is open and I can see neither detection nor classification. I'm not sure what I need to change or work on.
I use python2.7 OpenCV 4.2.0 and TensorFlow 1.5.0
The code I'm working with:
from statistics import mode
import cv2, time
from keras.models import load_model
from keras.datasets import mnist
import tensorflow as tf
import numpy as np
import vision_definitions
from PIL import Image
import numpy as np
import sys, os
from utils.inference import detect_digits
from utils.inference import draw_text
from utils.inference import draw_bounding_box
from utils.inference import apply_offsets
from utils.inference import load_detection_model
from utils.preprocessor import preprocess_input
# parameters for loading data and images
detection_model_path = '../trained_models/detection_models/model.sav'
class_model_path = '../trained_models/class_models/Num.h5'
# hyper-parameters for bounding boxes shape
frame_window = 10
class_offsets = (20, 40)
# loading models
digit_detection = load_detection_model(detection_model_path)
class_classifier = load_model(class_model_path)
# getting input model shapes for inference
class_target_size = class_classifier.input_shape[1:3]
class_window = []
class_window1 = []
# starting video streaming
cameraIndex = 0
resolution = vision_definitions.kVGA
colorSpace = vision_definitions.kRGBColorSpace
resolution = 2
colorSpace = 3
cv2.namedWindow('window_frame')
video_capture = cv2.VideoCapture(0)
if video_capture.isOpened():
frame = video_capture.read()
else:
rval = False
while True:
rval, frame = video_capture.read()
gray_image = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
rgb_image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
digits = detect_digits(digit_detection, gray_image)
frame = cv2.resize(frame, (640, 480))
key = cv2.waitKey(1)
b,g,r = cv2.split(frame) # get b,g,r
rgb_img = cv2.merge([r,g,b]) # switch it to rgb
for digit_coordinates in digits:
x1, x2, y1, y2 = apply_offsets(digit_coordinates, class_offsets)
gray_digit = gray_image[y1:y2, x1:x2]
try:
gray_digit = cv2.resize(gray_digit, (class_target_size))
except:
continue
gray_digit = preprocess_input(gray_digit, True)
gray_digit = np.expand_dims(gray_digit, 0)
gray_digit = np.expand_dims(gray_digit, -1)
class_prediction = class_classifier.predict(gray_digit)
class_probability = np.max(class_prediction)
class_label_arg = np.argmax(class_prediction)
color = color.astype(int)
color = color.tolist()
draw_bounding_box(digit_coordinates, rgb_image, color)
draw_text(digit_coordinates, rgb_image, class_mode,
color, 0, -45, 1, 1)
frame = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR)
cv2.imshow('window_frame', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
Solution 1:[1]
I spend sometimes since there is no CV2.imshow() on Windows except C++ but there it is ...
[ Sample ]:
import cv2
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import numpy as np
import tensorflow as tf
import os
from os.path import exists
import time
def f1( picture ):
return np.asarray( picture )
fig = plt.figure()
image = plt.imread( "C:\\Users\\Jirayu Kaewprateep\\Pictures\\Cats\\samples\\03.png" )
im = plt.imshow( image )
global video_capture_0
video_capture_0 = cv2.VideoCapture(0)
video_capture_1 = cv2.VideoCapture(1)
def animate(i ):
ret0, frame0 = video_capture_0.read()
if (ret0):
picture = np.concatenate( ( np.reshape(frame0[:,:,2:3], ( 480, 640, 1 )),
np.reshape(frame0[:,:,1:2], ( 480, 640, 1 )),
np.reshape(frame0[:,:,0:1], ( 480, 640, 1 ))),
axis=2 )
im.set_array( f1( picture ) )
return im,
while True:
# Capture frame-by-frame
ret0, frame0 = video_capture_0.read()
ani = animation.FuncAnimation(fig, animate, interval=50, blit=True)
plt.show()
# When everything is done, release the capture
video_capture_0.release()
cv2.destroyAllWindows()
[ Model ]:
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model = tf.keras.models.Sequential([
tf.keras.layers.InputLayer(input_shape=( 29, 39, 3 )),
# tf.keras.layers.Reshape(( 29, 39 * 3 )),
# tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True, return_state=False)),
tf.keras.layers.RandomFlip("horizontal_and_vertical"),
tf.keras.layers.RandomRotation(0.2),
tf.keras.layers.RandomZoom(.5, .2),
tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D((2, 2)),
tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D((2, 2)),
tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
tf.keras.layers.Dense(64),
tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
tf.keras.layers.Dense(64),
])
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(64))
model.add(tf.keras.layers.Dense(2))
model.summary()
[ Output ]:
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|---|
Solution 1 | Martijn Pieters |