'Subclass API Model Not work in tf.gradienttape() (No gradient defined for operation 'IteratorGetNext' (op type: IteratorGetNext))

I Made tensorflow model by using subclass api and try to fit model by using gradient tape but in this process i got error like this when i execute this code :

with tf.GradientTape() as tape:
    logits_r,logits_cw = model(img,training=True)
    loss1 = balanced_entropy(logits_r,hr)
    loss2 = balanced_entropy(logits_cw,hb)
    w1,w2 = cross_two_tasks_weight(hr,hb)
    loss = w1*loss1+w2*loss2

No gradient defined for operation 'IteratorGetNext' (op type: IteratorGetNext)

how can i handle my error in this case

This is model part of my code

import tensorflow as tf
import cv2
import pdb
import numpy as np
import os
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

#print('Is gpu available: ',tf.test.is_gpu_available());

#%%
def conv2d(dim,kernel_size=3,stride=1,rate=1,pad='same',act='relu'):
    result = tf.keras.Sequential()
    result.add(
        tf.keras.layers.Conv2D(dim,kernel_size,
            strides=stride,padding=pad,dilation_rate=rate))
    if act=='leaky':
        result.add(tf.keras.layers.LeakyReLU())
    elif act=='relu':
        result.add(tf.keras.layers.ReLU())

    return result


def dilation_conv2d(dim,kernel_size=3,stride=1,pad='same',act='relu',
    dilation_rate=1,use_bias=False):
    result = tf.keras.Sequential()
    result.add(
        tf.keras.layers.Conv2D(dim,kernel_size,
            strides=stride,padding=pad,use_bias=use_bias,
            kernel_initializer=tf.keras.initializers.HeNormal(),
            dilation_rate=dilation_rate))
    if act=='leaky':
        result.add(tf.keras.layers.LeakyReLU())
    elif act=='relu':
        result.add(tf.keras.layers.ReLU())
    result.add(tf.keras.layers.BatchNormalization())    
    return result

def conv2d_2(dim,kernel_size=3,stride=1,rate=1,pad='same',act='relu',
    dilation_rate=1,use_bias=False):
    result = tf.keras.Sequential()
    result.add(
        tf.keras.layers.Conv2D(dim,kernel_size,
            strides=stride,padding=pad,use_bias=use_bias,
            kernel_initializer=tf.keras.initializers.HeNormal(),
            dilation_rate=dilation_rate))
    if act=='leaky':
        result.add(tf.keras.layers.LeakyReLU())
    elif act=='relu':
        result.add(tf.keras.layers.ReLU())
    result.add(tf.keras.layers.BatchNormalization())    
    return result    

def max_pool2d(size=2,stride=2,pad='valid'):
    result = tf.keras.Sequential()
    result.add(
        tf.keras.layers.MaxPool2D(pool_size=size,
            strides=stride,padding=pad))
    return result

def upconv2d(dim,size=4,stride=2,pad='same',act='relu'):
    result = tf.keras.Sequential()
    result.add(
        tf.keras.layers.Conv2DTranspose(dim,size,
            strides=stride,padding=pad))
    if act=='relu':
        result.add(tf.keras.layers.ReLU())
    return result


class deepfloorplanModel(Model):
    def __init__(self,config=None):
        super(deepfloorplanModel,self).__init__()
        self._resnet50init()
        # room boundary prediction (rbp)
        self.rbpcv1 = conv2d_2(256)
        self.rbpcv2 = conv2d_2(256)
        self.rbpcv_final = tf.keras.layers.Conv2D(3, kernel_size=(1, 1), padding="same")

        # room type prediction (rtp)
        self.rtpcv1 = conv2d_2(256)
        self.rtpcv2 = conv2d_2(256)
        self.rtpcv_final = tf.keras.layers.Conv2D(9, kernel_size=(1, 1), padding="same")
        
        # attention map
        self.atts1 = conv2d(dim=256)
        self.atts2 = conv2d(dim=256)
        self.atts3 = conv2d(dim=1,kernel_size=1,act='sigmoid') 
        # reduce the tensor depth
        self.xs1 = conv2d(dim=256)
        self.xs2 = conv2d(dim=1,kernel_size=1,act='linear')
        
        # context conv2d
        dak = [9,17,33,65] # kernel_shape=[h,v,inc,outc]
        # horizontal
        self.hs = self.constant_kernel((128,1,1,1))
        self.hf = tf.keras.layers.Conv2D(1,[128,1],
            strides=1,padding='same',trainable=False,
            use_bias=False,weights=[self.hs]) 
        # vertical
        self.vs = self.constant_kernel((1,128,1,1)) 
        self.vf = tf.keras.layers.Conv2D(1,[1,128],
            strides=1,padding='same',trainable=False,
            use_bias=False,weights=[self.vs]) 

        # diagonal
        self.ds = self.constant_kernel((128,128,1,1),diag=True) 
        self.df = tf.keras.layers.Conv2D(1,128,
            strides=1,padding='same',trainable=False,
            use_bias=False,weights=[self.ds]) 
        # diagonal flip
        self.dfs = self.constant_kernel((128,128,1,1),diag=True,flip=True)
        self.dff = tf.keras.layers.Conv2D(1,128,
            strides=1,padding='same',trainable=False,
            use_bias=False,weights=[self.dfs])           

        # expand dim
        self.ed=conv2d(dim=256,kernel_size=1,act='linear') 
        # learn rich feature
        self.lrf=conv2d(dim=256)
        
        #dilated layer
        self.dilated_1 = dilation_conv2d(256,kernel_size=1,use_bias=True)
        self.dilated_2 = dilation_conv2d(256,kernel_size=1,dilation_rate=1)
        self.dilated_3 = dilation_conv2d(256,kernel_size=3,dilation_rate=6)
        self.dilated_4 = dilation_conv2d(256,kernel_size=3,dilation_rate=12)
        self.dilated_5 = dilation_conv2d(256,kernel_size=3,dilation_rate=18)
        self.dilation_out = dilation_conv2d(256,kernel_size=1)
        self.dilation_sub = dilation_conv2d(48,kernel_size=1)

    def _resnet50init(self):
        self.base_model = ResNet50(weights='imagenet',include_top=False,
            input_shape=(512,512,3))
        for layer in self.base_model.layers:
            layer.trainable = False    
        self.resnet50_a = Model(inputs=self.base_model.input, 
            outputs=self.base_model.get_layer('conv4_block6_2_relu').output)              
        self.resnet50_b = Model(inputs=self.base_model.input,
            outputs=self.base_model.get_layer('conv2_block3_2_relu').output)    


    def constant_kernel(self,shape,val=1,diag=False,flip=False):
        k = 0
        if not diag:
            k = val*np.ones(shape)
        else:
            w = np.eye(shape[0],shape[1])
            if flip:
                w = w.reshape((shape[0],shape[1],1))
                w = np.flip(w,1)
            k = w.reshape(shape)
        return k

    def DilatedSpatialPyramidPooling(self, dspp_input):
        dims = dspp_input.shape
        x = tf.keras.layers.AveragePooling2D(pool_size=(dims[-3], dims[-2]))(dspp_input)
        x = self.dilated_1(x)
        out_pool = tf.keras.layers.UpSampling2D(size=(dims[-3] // x.shape[1], dims[-2] // x.shape[2]), interpolation="bilinear",)(x)

        out_1 = self.dilated_2(dspp_input)
        out_6 = self.dilated_3(dspp_input)
        out_12 = self.dilated_4(dspp_input)
        out_18 = self.dilated_5(dspp_input)

        x = tf.keras.layers.Concatenate(axis=-1)([out_pool, out_1, out_6, out_12, out_18])
        output = self.dilation_out(x)
        return output            
    # t1 : rbp /t2: rtp
    def non_local_context(self,t1,t2,stride=4):
        N,H,W,C = t1.shape.as_list()
        hs = H // stride if (H // stride) > 1 else (stride-1)
        vs = W // stride if (W // stride) > 1 else (stride-1)
        hs = hs if (hs%2!=0) else hs+1
        vs = hs if (vs%2!=0) else vs+1
        a = t1 
        x = t2
        a = self.atts1(a)
        a = self.atts2(a)
        a = self.atts3(a)
        a = tf.keras.activations.sigmoid(a)      
        x = self.xs1(x)
        x = self.xs2(x)
        x = a*x
        
        h = self.hf(x)
        v = self.vf(x)
        d = self.df(x)
        f = self.dff(x)
        c1 = a*(h+v+d+f)
        c1 = self.ed(c1)
        
        features = tf.concat([t2,c1],axis=3)
        out = self.lrf(features)
        return out
    def call(self, inputs):
        feature_a = self.resnet50_a.predict(inputs)
        feature_a = self.DilatedSpatialPyramidPooling(feature_a)
        feature_a = tf.keras.layers.UpSampling2D(size=(512 // 4 // feature_a.shape[1], 512 // 4 // feature_a.shape[2]),
            interpolation="bilinear")(feature_a)

        feature_b = self.resnet50_b.predict(inputs)
        feature_b = self.dilation_sub(feature_b)

        common_feature = tf.keras.layers.Concatenate(axis=-1)([feature_a, feature_b])
        # room_boundary_Upsampling
        rbp_feature = self.rbpcv1(common_feature)
        rbp_feature = self.rbpcv2(rbp_feature)
        #128 x 128 x 256 feature
        attention_bp = rbp_feature
        rbp_feature = tf.keras.layers.UpSampling2D(size=(512 // rbp_feature.shape[1], 512 // rbp_feature.shape[2]),interpolation="bilinear",)(rbp_feature)
        rbp_feature = self.rbpcv_final(rbp_feature)
        #room_type_Upsampling
        rtp_feature = self.rtpcv1(common_feature)
        rtp_feature = self.rtpcv2(rtp_feature)
        rtp_feature = self.non_local_context(attention_bp,rtp_feature)
        rtp_feature = tf.keras.layers.UpSampling2D(size=(512 // rtp_feature.shape[1], 512 // rtp_feature.shape[2]),interpolation="bilinear",)(rtp_feature)
        rtp_feature = self.rtpcv_final(rtp_feature)


        return rtp_feature,rbp_feature 

and the error i got is like this

---------------------------------------------------------------------------
LookupError                               Traceback (most recent call last)
C:\ProgramData\Anaconda3\envs\tf_envs_0\lib\site-packages\tensorflow\python\ops\gradients_util.py in _GradientsHelper(ys, xs, grad_ys, name, colocate_gradients_with_ops, gate_gradients, aggregation_method, stop_gradients, unconnected_gradients, src_graph)
    605           try:
--> 606             grad_fn = ops.get_gradient_function(op)
    607           except LookupError:

C:\ProgramData\Anaconda3\envs\tf_envs_0\lib\site-packages\tensorflow\python\framework\ops.py in get_gradient_function(op)
   2731     op_type = op.type
-> 2732   return gradient_registry.lookup(op_type)
   2733 

C:\ProgramData\Anaconda3\envs\tf_envs_0\lib\site-packages\tensorflow\python\framework\registry.py in lookup(self, name)
     98     else:
---> 99       raise LookupError(
    100           "%s registry has no entry for: %s" % (self._name, name))

LookupError: gradient registry has no entry for: IteratorGetNext

During handling of the above exception, another exception occurred:

LookupError                               Traceback (most recent call last)
c:\Users\HAN\OneDrive - UOS\바탕 화면\TF2DeepFloorplan-main - test_net\train.py in <module>
      111 with tf.GradientTape() as tape:
----> 112     logits_r,logits_cw = model(img,training=True)
      113     loss1 = balanced_entropy(logits_r,hr)
      114     loss2 = balanced_entropy(logits_cw,hb)
      115     w1,w2 = cross_two_tasks_weight(hr,hb)

C:\ProgramData\Anaconda3\envs\tf_envs_0\lib\site-packages\keras\engine\base_layer.py in __call__(self, *args, **kwargs)
   1035         with autocast_variable.enable_auto_cast_variables(
   1036             self._compute_dtype_object):
-> 1037           outputs = call_fn(inputs, *args, **kwargs)
   1038 
   1039         if self._activity_regularizer:

c:\Users\HAN\OneDrive - UOS\바탕 화면\TF2DeepFloorplan-main - test_net\my_deepfloorplan_code.py in call(self, inputs)
    200         return out
    201     def call(self, inputs):
--> 202         feature_a = self.resnet50_a.predict(inputs)
    203         feature_a = self.DilatedSpatialPyramidPooling(feature_a)
    204         feature_a = tf.keras.layers.UpSampling2D(size=(512 // 4 // feature_a.shape[1], 512 // 4 // feature_a.shape[2]),

C:\ProgramData\Anaconda3\envs\tf_envs_0\lib\site-packages\keras\engine\training.py in predict(self, x, batch_size, verbose, steps, callbacks, max_queue_size, workers, use_multiprocessing)
   1749           for step in data_handler.steps():
   1750             callbacks.on_predict_batch_begin(step)
-> 1751             tmp_batch_outputs = self.predict_function(iterator)
   1752             if data_handler.should_sync:
   1753               context.async_wait()

C:\ProgramData\Anaconda3\envs\tf_envs_0\lib\site-packages\tensorflow\python\eager\def_function.py in __call__(self, *args, **kwds)
    883 
    884       with OptionalXlaContext(self._jit_compile):
--> 885         result = self._call(*args, **kwds)
    886 
    887       new_tracing_count = self.experimental_get_tracing_count()

C:\ProgramData\Anaconda3\envs\tf_envs_0\lib\site-packages\tensorflow\python\eager\def_function.py in _call(self, *args, **kwds)
    922       # In this case we have not created variables on the first call. So we can
    923       # run the first trace but we should fail if variables are created.
--> 924       results = self._stateful_fn(*args, **kwds)
    925       if self._created_variables and not ALLOW_DYNAMIC_VARIABLE_CREATION:
    926         raise ValueError("Creating variables on a non-first call to a function"

C:\ProgramData\Anaconda3\envs\tf_envs_0\lib\site-packages\tensorflow\python\eager\function.py in __call__(self, *args, **kwargs)
   3037       (graph_function,
   3038        filtered_flat_args) = self._maybe_define_function(args, kwargs)
-> 3039     return graph_function._call_flat(
   3040         filtered_flat_args, captured_inputs=graph_function.captured_inputs)  # pylint: disable=protected-access
   3041 

C:\ProgramData\Anaconda3\envs\tf_envs_0\lib\site-packages\tensorflow\python\eager\function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
   1967         possible_gradient_type,
   1968         executing_eagerly)
-> 1969     forward_function, args_with_tangents = forward_backward.forward()
   1970     if executing_eagerly:
   1971       flat_outputs = forward_function.call(

C:\ProgramData\Anaconda3\envs\tf_envs_0\lib\site-packages\tensorflow\python\eager\function.py in forward(self)
   1492   def forward(self):
   1493     """Builds or retrieves a forward function for this call."""
-> 1494     forward_function = self._functions.forward(
   1495         self._inference_args, self._input_tangents)
   1496     return forward_function, self._inference_args + self._input_tangents

C:\ProgramData\Anaconda3\envs\tf_envs_0\lib\site-packages\tensorflow\python\eager\function.py in forward(self, inference_args, input_tangents)
   1224       (self._forward, self._forward_graph, self._backward,
   1225        self._forwardprop_output_indices, self._num_forwardprop_outputs) = (
-> 1226            self._forward_and_backward_functions(inference_args, input_tangents))
   1227     return self._forward
   1228 

C:\ProgramData\Anaconda3\envs\tf_envs_0\lib\site-packages\tensorflow\python\eager\function.py in _forward_and_backward_functions(self, inference_args, input_tangents)
   1375     """
   1376     outputs = self._func_graph.outputs[:self._num_inference_outputs]
-> 1377     return self._build_functions_for_outputs(
   1378         outputs, inference_args, input_tangents)
   1379 

C:\ProgramData\Anaconda3\envs\tf_envs_0\lib\site-packages\tensorflow\python\eager\function.py in _build_functions_for_outputs(self, outputs, inference_args, input_tangents)
    942         gradients_wrt_outputs.append(gradient_placeholder)
    943       with ops.device(None):
--> 944         gradients_wrt_inputs = gradients_util._GradientsHelper(  # pylint: disable=protected-access
    945             trainable_outputs,
    946             self._func_graph.inputs,

C:\ProgramData\Anaconda3\envs\tf_envs_0\lib\site-packages\tensorflow\python\ops\gradients_util.py in _GradientsHelper(ys, xs, grad_ys, name, colocate_gradients_with_ops, gate_gradients, aggregation_method, stop_gradients, unconnected_gradients, src_graph)
    632               grad_fn = func_call.python_grad_func
    633             else:
--> 634               raise LookupError(
    635                   "No gradient defined for operation '%s' (op type: %s)" %
    636                   (op.name, op.type))

LookupError: No gradient defined for operation 'IteratorGetNext' (op type: IteratorGetNext)


Solution 1:[1]

Subclass model can use model.fit, but you need to compile it before fit, and don't use gradienttape. I believe there is a way to explicitly state fit procedure like using gradient tape. You should find an example for explicit compile and fit.

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 Yui Chun Leung