'How can I resolve " InvalidArgumentError: Graph execution error: jpeg::Uncompress failed. Invalid JPEG data or crop window"?

Beginner here.

I recently converted my images to grayscale using opencv. Then I used those images for training. When I was training, there was an error. "InvalidArgumentError: Graph execution error:

jpeg::Uncompress failed. Invalid JPEG data or crop window.
     [[{{node decode_image/DecodeImage}}]]
     [[IteratorGetNext]] [Op:__inference_test_function_1748]"

Can someone please explain to me what it means?

Here's my code:

import tensorflow as tf
import matplotlib.pyplot as plt
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers
from tensorflow.keras import Sequential

image_size = (224, 224)
batch_size = 32

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    "IMAGES_G/",
    validation_split=0.2,
    subset="training",
    color_mode="grayscale",
    seed=123,
    image_size=image_size,
    batch_size=batch_size,
)
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    "IMAGES_G",
    validation_split=0.2,
    subset="validation",
    color_mode="grayscale",
    seed=123,
    image_size=image_size,
    batch_size=batch_size,
)

class_names = ["med1", "med2", "med3","med4","med5","med6","med7","med8","med9","med10"]
plt.figure(figsize=(10,10))
for images, labels in train_ds.take(1):
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(images[i].numpy().astype("uint8"))
    plt.title(class_names[labels[i]])
    plt.axis("off")

AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

normalization_layer = layers.Rescaling(1./255)

import numpy as np

normalized_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
image_batch, labels_batch = next(iter(normalized_ds))
first_image = image_batch[0]
print(np.min(first_image), np.max(first_image))


model = Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 1))),
model.add(layers.MaxPooling2D((2, 2))),
model.add(layers.Conv2D(64, (3, 3), activation='relu')),
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu')),
model.add(layers.Dropout(0.2)),

model.add(layers.Flatten()),
model.add(layers.Dense(64, activation='softmax')),
model.add(layers.Dropout(0.2)),
model.add(layers.Dense(10))

model.summary()

model.compile(
    optimizer="adam",
    loss=tf.losses.SparseCategoricalCrossentropy(from_logits = True),
    metrics=['accuracy']
)

model.fit(
    train_ds,
    validation_data = val_ds,
    epochs = 50
)

And if you have any idea how to improve this code, I really appreciate it if you comment it.



Solution 1:[1]

There might be a problem in some of your training data. You can use the code below to check which image is corrupted and delete it.

from struct import unpack
from tqdm import tqdm
import os
    
marker_mapping = {
    0xffd8: "Start of Image",
    0xffe0: "Application Default Header",
    0xffdb: "Quantization Table",
    0xffc0: "Start of Frame",
    0xffc4: "Define Huffman Table",
    0xffda: "Start of Scan",
    0xffd9: "End of Image"
}
    
class JPEG:
    def __init__(self, image_file):
        with open(image_file, 'rb') as f:
            self.img_data = f.read()
    
    def decode(self):
        data = self.img_data
        while(True):
            marker, = unpack(">H", data[0:2])
            # print(marker_mapping.get(marker))
            if marker == 0xffd8:
                data = data[2:]
            elif marker == 0xffd9:
                return
            elif marker == 0xffda:
                data = data[-2:]
            else:
                lenchunk, = unpack(">H", data[2:4])
                data = data[2+lenchunk:]            
            if len(data)==0:
                break        
    
bads = []

for img in tqdm(images):
  image = osp.join(root_img,img)
  image = JPEG(image) 
  try:
    image.decode()   
  except:
    bads.append(img)


for name in bads:
  os.remove(osp.join(root_img,name))

Let us know if the issue still persists. Thanks!

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 Tfer3