'Using tf.keras.utils.image_dataset_from_directory with label list
I have list of labels corresponding numbers of files in directory example: [1,2,3]
train_ds = tf.keras.utils.image_dataset_from_directory(
train_path,
label_mode='int',
labels = train_labels,
# validation_split=0.2,
# subset="training",
shuffle=False,
seed=123,
image_size=(img_height, img_width),
batch_size=batch_size)
I get error:
ValueError: Expected the lengths of `labels` to match the number of files in the target directory. len(labels) is 51033 while we found 0 files in ../input/jpeg-happywhale-128x128/train_images-128-128/train_images-128-128.
I tried define parent directory, but in that case I get 1 class.
Solution 1:[1]
Your data folder probably does not have the right structure. Try something like this:
import numpy
from PIL import Image
import tensorflow as tf
samples = 10
for idx, c in enumerate(['/content/data/class1/', '/content/data/class2/']*samples):
imarray = numpy.random.rand(100,100,3) * 255
im = Image.fromarray(imarray.astype('uint8')).convert('RGB')
im.save('{}result_image{}.png'.format(c, idx))
train_labels = [0]*samples + [1]*samples
train_ds = tf.keras.utils.image_dataset_from_directory(
'/content/data',
label_mode='int',
labels = train_labels,
shuffle=False,
seed=123,
image_size=(100, 100),
batch_size=4)
for x, y in train_ds.take(1):
print(x.shape, y)
Found 20 files belonging to 2 classes.
(4, 100, 100, 3) tf.Tensor([0 0 0 0], shape=(4,), dtype=int32)
Your folder structure should look like this:
??? data
? ??? class2
? ? ??? result_image5.png
? ? ??? result_image9.png
? ? ??? result_image15.png
? ? ??? result_image13.png
? ? ??? result_image1.png
? ? ??? result_image3.png
? ? ??? result_image11.png
? ? ??? result_image19.png
? ? ??? result_image7.png
? ? ??? result_image17.png
? ??? class1
? ??? result_image14.png
? ??? result_image8.png
? ??? result_image12.png
? ??? result_image18.png
? ??? result_image16.png
? ??? result_image6.png
? ??? result_image2.png
? ??? result_image10.png
? ??? result_image4.png
? ??? result_image0.png
Solution 2:[2]
from the document image_dataset_from_directory it specifically required a label as inferred and none when used but the directory structures are specific to the label name. I am using the cats and dogs image to categorize where cats are labeled '0' and dog is the next label.
[ Sample ]:
import os
import tensorflow as tf
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
PATH = 'F:\\datasets\\downloads\\sample\\cats_dogs\\training'
train_dir = os.path.join(PATH, 'train')
validation_dir = os.path.join(PATH, 'validation')
BATCH_SIZE = 1 # 32
IMG_SIZE = (32, 32)
train_dataset = tf.keras.utils.image_dataset_from_directory(train_dir,
shuffle=True,
batch_size=BATCH_SIZE,
image_size=IMG_SIZE)
validation_dataset = tf.keras.utils.image_dataset_from_directory(validation_dir,
shuffle=True,
batch_size=BATCH_SIZE,
image_size=IMG_SIZE)
class_names = train_dataset.class_names
print('Number of training batches: %d' % tf.data.experimental.cardinality(train_dataset).numpy())
print('Number of validation batches: %d' % tf.data.experimental.cardinality(validation_dataset).numpy())
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
DataSet
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
AUTOTUNE = tf.data.AUTOTUNE
train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
validation_dataset = validation_dataset.prefetch(buffer_size=AUTOTUNE)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model = tf.keras.models.Sequential([
tf.keras.layers.InputLayer(input_shape=( 32, 32, 3 )),
tf.keras.layers.Reshape((32, 32 * 3)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM( 32, return_sequences=True, return_state=False )),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM( 32 )),
tf.keras.layers.Dense( 256 ),
tf.keras.layers.Dropout(.2),
tf.keras.layers.Dense( 256 ),
])
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(10))
model.summary()
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Optimizer
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
optimizer = tf.keras.optimizers.Nadam(
learning_rate=0.00001, beta_1=0.9, beta_2=0.999, epsilon=1e-07,
name='Nadam'
) # 0.00001
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Loss Fn
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
# 1
# lossfn = tf.keras.losses.MeanSquaredLogarithmicError(reduction=tf.keras.losses.Reduction.AUTO, name='mean_squared_logarithmic_error')
# 2
lossfn = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=False, reduction=tf.keras.losses.Reduction.AUTO, name='sparse_categorical_crossentropy')
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Summary
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model.compile(optimizer=optimizer, loss=lossfn, metrics=['accuracy', tf.keras.metrics.CategoricalAccuracy()])
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Training
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
history = model.fit(train_dataset, epochs=15000 ,validation_data=(validation_dataset))
input("Press Any Key!")
[ Output ]:
Epoch 1233/15000
1/14 [=>............................] - ETA: 0s - loss: 1.2278e-05 - accuracy: 1.0000 - categorical_accuracy: 0.0000e+0
3/14 [=====>........................] - ETA: 0s - loss: 0.7675 - accuracy: 1.0000 - categorical_accuracy: 0.3333
14/14 [==============================] - 1s 40ms/step - loss: 1.3322 - accuracy: 0.7857 - categorical_accuracy: 0.5000 - val_loss: 1.1513 - val_accuracy: 0.7857 - val_categorical_accuracy: 0.5000
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|---|
Solution 1 | AloneTogether |
Solution 2 | Martijn Pieters |