'Cannot set tensor: Dimension mismatch. Got 3 but expected 4 for input 0
This is probably going to be a stupid question but I am new to deep learning and TensorFlow. Here I have converted my deep learning model to TF-lite, after that when I am trying to do the inference with the TF lite model (Here I am dealing with audio files),I am getting this error - Cannot set tensor: Dimension mismatch.
Any help will be greatly appreciable. Thanks in Advance!
Code: '''
import soundfile as sf
import numpy as np
import tflite_runtime.interpreter as tflite
import time
##########################
# the values are fixed, if you need other values, you have to retrain.
# The sampling rate of 16k is also fix.
block_len = 512
block_shift = 128
# load models
interpreter_1 = tflite.Interpreter(model_path='./models_DTLN_model/models_DTLN_model_1.tflite')
interpreter_1.allocate_tensors()
interpreter_2 = tflite.Interpreter(model_path='./models_DTLN_model/models_DTLN_model_2.tflite')
interpreter_2.allocate_tensors()
# Get input and output tensors.
input_details_1 = interpreter_1.get_input_details()
output_details_1 = interpreter_1.get_output_details()
input_details_2 = interpreter_2.get_input_details()
output_details_2 = interpreter_2.get_output_details()
# create states for the lstms
states_1 = np.zeros(input_details_1[1]['shape']).astype('float32')
states_2 = np.zeros(input_details_2[1]['shape']).astype('float32')
# load audio file at 16k fs (please change)
audio,fs = sf.read('./models_DTLN_model/input/fileid_3.wav')
# check for sampling rate
if fs != 16000:
raise ValueError('This model only supports 16k sampling rate.')
# preallocate output audio
out_file = np.zeros((len(audio)))
# create buffer
in_buffer = np.zeros((block_len)).astype('float32')
out_buffer = np.zeros((block_len)).astype('float32')
# calculate number of blocks
num_blocks = (audio.shape[0] - (block_len-block_shift)) // block_shift
time_array = []
# iterate over the number of blcoks
for idx in range(num_blocks):
start_time = time.time()
# shift values and write to buffer
in_buffer[:-block_shift] = in_buffer[block_shift:]
in_buffer[-block_shift:] = audio[idx*block_shift:(idx*block_shift)+block_shift]
# calculate fft of input block
in_block_fft = np.fft.rfft(in_buffer)
in_mag = np.abs(in_block_fft)
in_phase = np.angle(in_block_fft)
# reshape magnitude to input dimensions
in_mag = np.reshape(in_mag, (1,1,-1)).astype('float32')
# set tensors to the first model
interpreter_1.set_tensor(input_details_1[1]['index'], states_1)
interpreter_1.set_tensor(input_details_1[0]['index'], in_mag)
# run calculation
interpreter_1.invoke()
# get the output of the first block
out_mask = interpreter_1.get_tensor(output_details_1[0]['index'])
states_1 = interpreter_1.get_tensor(output_details_1[1]['index'])
# calculate the ifft
estimated_complex = in_mag * out_mask * np.exp(1j * in_phase)
estimated_block = np.fft.irfft(estimated_complex)
# reshape the time domain block
estimated_block = np.reshape(estimated_block, (1,1,-1)).astype('float32')
# set tensors to the second block
interpreter_2.set_tensor(input_details_2[1]['index'], states_2)
interpreter_2.set_tensor(input_details_2[0]['index'], estimated_block)
# run calculation
interpreter_2.invoke()
# get output tensors
out_block = interpreter_2.get_tensor(output_details_2[0]['index'])
states_2 = interpreter_2.get_tensor(output_details_2[1]['index'])
# shift values and write to buffer
out_buffer[:-block_shift] = out_buffer[block_shift:]
out_buffer[-block_shift:] = np.zeros((block_shift))
out_buffer += np.squeeze(out_block)
# write block to output file
out_file[idx*block_shift:(idx*block_shift)+block_shift] = out_buffer[:block_shift]
time_array.append(time.time()-start_time)
# write to .wav file
sf.write('out.wav', out_file, fs)
print('Processing Time [ms]:')
print(np.mean(np.stack(time_array))*1000)
print('Processing finished.')
'''
Error: I got the following Dimension mismatch error.
'''
Traceback (most recent call last):
File "real_time_processing_tf_lite.py", line 69, in <module>
interpreter_1.set_tensor(input_details_1[0]['index'], in_mag)
File "/usr/local/lib/python3.7/site-packages/tflite_runtime/interpreter.py", line 698, in set_tensor
self._interpreter.SetTensor(tensor_index, value)
ValueError: Cannot set tensor: Dimension mismatch. Got 3 but expected 4 for input 0.
''' This is what shape suppose to be:
print(interpreter_1.get_input_details())
>>name': 'serving_default_input_3:0', 'index': 0, 'shape': array([ 1, 2, 128, 2], dtype=int32), 'shape_signature': array([ 1, 2, 128, 2], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}, {'name': 'serving_default_input_2:0', 'index': 1, 'shape': array([ 1, 1, 257], dtype=int32), 'shape_signature': array([ 1, 1, 257], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
And when I am adding another dimension in the in_mag, getting this error:
in_mag = np.reshape(in_mag, (-1,1,1,-1)).astype('float32')
>>ValueError: can only specify one unknown dimension
And for other values getting this error:
>>cannot reshape array of size 257 into shape (1,128,newaxis)
Solution 1:[1]
You're setting the shape here to a tensor of size 3:
in_mag = np.reshape(in_mag, (1,1,-1)).astype('float32')
You need to add another dimension. Not familiar with what you're trying to achieve, so this is only a suggestion:
in_mag = np.reshape(in_mag, (-1,1,1,-1)).astype('float32')
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|---|
Solution 1 | kelkka |