'Detect digit on a live video camera using OpenCV and TensorFlow

I tried the code provided below to detect digit in the video camera and put a contour around it then classify it using the H5 model but it's giving me bad results, just the camera is open and I can see neither detection nor classification. I'm not sure what I need to change or work on.

I use python2.7 OpenCV 4.2.0 and TensorFlow 1.5.0

The code I'm working with:

from statistics import mode
import cv2, time
from keras.models import load_model
from keras.datasets import mnist
import tensorflow as tf
import numpy as np
import vision_definitions
from PIL import Image
import numpy as np
import sys, os

from utils.inference import detect_digits
from utils.inference import draw_text
from utils.inference import draw_bounding_box
from utils.inference import apply_offsets
from utils.inference import load_detection_model
from utils.preprocessor import preprocess_input

# parameters for loading data and images
detection_model_path = '../trained_models/detection_models/model.sav'
class_model_path = '../trained_models/class_models/Num.h5'

# hyper-parameters for bounding boxes shape
frame_window = 10
class_offsets = (20, 40)

# loading models
digit_detection = load_detection_model(detection_model_path)
class_classifier = load_model(class_model_path)

# getting input model shapes for inference
class_target_size = class_classifier.input_shape[1:3]

class_window = []
class_window1 = []

# starting video streaming
cameraIndex = 0
resolution = vision_definitions.kVGA
colorSpace = vision_definitions.kRGBColorSpace
resolution = 2
colorSpace = 3
cv2.namedWindow('window_frame')
video_capture = cv2.VideoCapture(0)
if video_capture.isOpened():
 frame = video_capture.read()
else:
 rval = False
while True:
    rval, frame = video_capture.read()
    gray_image = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    rgb_image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    digits = detect_digits(digit_detection, gray_image)
    frame = cv2.resize(frame, (640, 480))
    key = cv2.waitKey(1)
    b,g,r = cv2.split(frame) # get b,g,r
    rgb_img = cv2.merge([r,g,b]) # switch it to rgb    

    for digit_coordinates in digits:
        x1, x2, y1, y2 = apply_offsets(digit_coordinates, class_offsets)
        gray_digit = gray_image[y1:y2, x1:x2]
        try:
            gray_digit = cv2.resize(gray_digit, (class_target_size))
        except:
            continue


        gray_digit = preprocess_input(gray_digit, True)
        gray_digit = np.expand_dims(gray_digit, 0)
        gray_digit = np.expand_dims(gray_digit, -1)
        class_prediction = class_classifier.predict(gray_digit)
        class_probability = np.max(class_prediction)
        class_label_arg = np.argmax(class_prediction)

       
        color = color.astype(int)
        color = color.tolist()

        draw_bounding_box(digit_coordinates, rgb_image, color)
        draw_text(digit_coordinates, rgb_image, class_mode,
                  color, 0, -45, 1, 1)

    frame = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR)
    cv2.imshow('window_frame', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

Solution 1:^[1]

I spend sometimes since there is no CV2.imshow() on Windows except C++ but there it is ...

[ Sample ]:

import cv2
import matplotlib.pyplot as plt
import matplotlib.animation as animation

import numpy as np

import tensorflow as tf

import os
from os.path import exists

import time

def f1( picture ):
    return np.asarray( picture )

fig = plt.figure()
image = plt.imread( "C:\\Users\\Jirayu Kaewprateep\\Pictures\\Cats\\samples\\03.png" )
im = plt.imshow( image )

global video_capture_0
video_capture_0 = cv2.VideoCapture(0)
video_capture_1 = cv2.VideoCapture(1)

def animate(i ):
    ret0, frame0 = video_capture_0.read()
    if (ret0):
        picture = np.concatenate( ( np.reshape(frame0[:,:,2:3], ( 480, 640, 1 )), 
                                    np.reshape(frame0[:,:,1:2], ( 480, 640, 1 )), 
                                    np.reshape(frame0[:,:,0:1], ( 480, 640, 1 ))),
                                    axis=2 )
        
        im.set_array( f1( picture ) )
    return im,
    
while True:
    # Capture frame-by-frame
    ret0, frame0 = video_capture_0.read()
    ani = animation.FuncAnimation(fig, animate, interval=50, blit=True)
    plt.show()

# When everything is done, release the capture
video_capture_0.release()
cv2.destroyAllWindows()

[ Model ]:

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model = tf.keras.models.Sequential([
    tf.keras.layers.InputLayer(input_shape=( 29, 39, 3 )),
    # tf.keras.layers.Reshape(( 29, 39 * 3 )),
    # tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True, return_state=False)),
    
    tf.keras.layers.RandomFlip("horizontal_and_vertical"),
    tf.keras.layers.RandomRotation(0.2),
    tf.keras.layers.RandomZoom(.5, .2),
    
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.Dense(64),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.Dense(64),
])
        
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(64))
model.add(tf.keras.layers.Dense(2))
model.summary()

[ Output ]: