'How to avoid "keras_learning_phase" being added to model when revising?

I'm making a small program to convert model from .h5/.hdf5 to .pb.

(With changing it's input layer from its original shape to [None, None, c])

((c's value is same as its original model's input channel))

The reason why doing so is that I can use any shape to inference with the trained model.

So far, I can go through the whole process with two different codes:

code_1: convert .h5/.hdf5 to .pb

from keras.models import load_model
import tensorflow as tf
import os
import json
from keras import backend as K
from tensorflow.python.framework import graph_io
import Unet
from keras.layers import Input

def focal_loss(gamma=2., alpha=.25):
    def focal_loss_fixed(y_true, y_pred):
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1+ K.pow(10.0, -9)))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0+ K.pow(10.0, -9)))
    return focal_loss_fixed

def IOU_calc():
    def IOU_calc_fixed(y_true, y_pred):
        smooth = 0.001
        y_true_f = K.flatten(y_true)
        y_pred_f = K.flatten(y_pred)

        intersection = K.sum(y_true_f * y_pred_f)

        return 2*(intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
    return IOU_calc_fixed

def IOU_calc_loss():
    return lambda x: -IOU_calc()

def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True):
    from tensorflow.python.framework.graph_util import convert_variables_to_constants
    graph = session.graph
    with graph.as_default():
        freeze_var_names = list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or []))
        output_names = output_names or []
        input_graph_def = graph.as_graph_def()
        if clear_devices:
            for node in input_graph_def.node:
                node.device = ""
        frozen_graph = convert_variables_to_constants(session, input_graph_def,
                                                      output_names, freeze_var_names)
        # add for new test
        frozen_graph = tf.graph_util.remove_training_nodes(frozen_graph)
        return frozen_graph

def h5_to_pb(h5_model, output_dir, model_name, out_prefix = "output_"):
    out_nodes = []
    for i in range(len(h5_model.outputs)):
        out_nodes.append(out_prefix + str(i + 1))
        tf.identity(h5_model.output[i], out_prefix + str(i + 1))
    # add for ignore dropout at inference process(?)
    K.set_learning_phase(0)

    sess = K.get_session()
    from tensorflow.python.framework import graph_util,graph_io
    init_graph = sess.graph.as_graph_def()
    main_graph = graph_util.convert_variables_to_constants(sess, init_graph, out_nodes)
    graph_io.write_graph(main_graph, output_dir, name = model_name, as_text = False)

if __name__ == '__main__':

    config_path = r'unet_h5_to_pb_config.json'
    with open(config_path) as config_buffer:
        a = config_buffer.read()
        a = a.replace('\\', '\\\\')
        config = json.loads(a)
    """-------------------------Set Paths-----------------------------------"""
    h5_model_path = config["h5_md_path"]

    if "\\" in h5_model_path:
        h5_name = h5_model_path.split("\\")[-1]
        pb_model_name = h5_name.split(".")[0] + ".pb"

        if len(config["pb_save_path"]) == 0:
            output_path = h5_model_path.strip(h5_model_path.split("\\")[-1])
            if h5_model_path[0] == ".":
                output_path = "." + output_path
        else:
            output_path = config["pb_save_path"]
            if os.path.exists(output_path) is False:
                os.mkdir(output_path)
        out_md_path = os.path.join(output_path, pb_model_name)
    else:
        h5_name = h5_model_path.split("/")[-1]
        pb_model_name = h5_name.split(".")[0] + ".pb"

        if len(config["pb_save_path"]) == 0:
            output_path = h5_model_path.strip(h5_model_path.split("/")[-1])
            if h5_model_path[0] == ".":
                output_path = "." + output_path
        else:
            output_path = config["pb_save_path"]
            if os.path.exists(output_path) is False:
                os.mkdir(output_path)
        out_md_path = os.path.join(output_path, pb_model_name)

    """---------------------------load h5 model-----------------------------"""
    K.set_learning_phase(0)
    try:
        net_model = load_model(h5_model_path,
                               custom_objects={'IOU_calc': IOU_calc()})
        print("***Handling model with structure***")
    except ValueError as E:
        if "Cannot create group in read only mode" in str(E):
            print("***Handling model without structure***")
            input_shape = tuple(config["input_shape"])
            model_class = config["model_class"]
            net_model = Unet.small_unet(input_shape, model_class)
            net_model.load_weights(h5_model_path)
        else:
            print(E)

    print('***input is :', net_model.input.name)
    print('***output is:', net_model.output.name)
    """----------------------------save pb file-----------------------------"""
    if config["name_output_node"]:
        h5_to_pb(net_model, output_dir=output_path, model_name=pb_model_name)
    else:
        sess = K.get_session()
        frozen_graph = freeze_session(K.get_session(),
                                      output_names=[net_model.output.op.name])
        graph_io.write_graph(frozen_graph,
                             output_path, pb_model_name, as_text=False)
    print(f"===== FINISH converting model to pb file {out_md_path} =====")

code_2: change .pb input layer shape

# ref: http://digital-thinking.de/tensorflow-replace-tensors-of-a-savedmodel-or-frozengraph/

import tensorflow as tf
from keras.layers import Input


def load_graph(frozen_graph_filename):
    with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())

    for n in graph_def.node:
        if "input_" in n.name:
            input_name = n.name + ":0"
            input_channel = n.attr["shape"].shape.dim[3].size

    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def, name='')

    return graph, input_name, input_channel


# ref:https://stackoverflow.com/a/56324805/10427809
def delete_ops_from_graph(graph_def, output_model_filepath):

    # Delete nodes
    nodes = []
    input_count = 0
    for node in graph_def.node:
        if "input" in node.name and "dropout" not in node.name:
            if input_count == 0:
                nodes.append(node)
                input_count += 1
            else:
                print('Drop', node.name)
        else:
            nodes.append(node)

    mod_graph_def = tf.GraphDef()
    mod_graph_def.node.extend(nodes)

    # Delete references to deleted nodes
    for node in mod_graph_def.node:
        inp_names = []
        for inp in node.input:
            if 'Neg' in inp:
                pass
            else:
                inp_names.append(inp)

        del node.input[:]
        node.input.extend(inp_names)
    with open(output_model_filepath, 'wb') as f:
        f.write(mod_graph_def.SerializeToString())
    print(f"*** save new model at{output_model_filepath}")

if __name__ == "__main__":

    tf_md_path = r'./pb/my_model_weights_480.pb'

    out_md_path = tf_md_path.replace(".pb", "_dynamic.pb")
    
    sess = tf.Session()
    graph_model, input_name, C = load_graph(tf_md_path)
    graph_model_def = graph_model.as_graph_def()
    resize_val = (None, None, C)
    rp_input = Input(resize_val)
    tf.import_graph_def(graph_model_def, name='',
                        input_map={input_name: rp_input})
    
    graph = tf.get_default_graph()
    graph_def = graph.as_graph_def()
    print(graph_def.node)
    delete_ops_from_graph = delete_ops_from_graph(graph_def, out_md_path)
    print("=== Finish update model with dynamic input size ===")

But when I try to combine the codes together(like this):

from keras.models import load_model
import tensorflow as tf
import os
import json
from keras import backend as K
from tensorflow.python.framework import graph_io
import Unet
from keras.layers import Input

def focal_loss(gamma=2., alpha=.25):
    def focal_loss_fixed(y_true, y_pred):
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1+ K.pow(10.0, -9)))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0+ K.pow(10.0, -9)))
    return focal_loss_fixed

def IOU_calc():
    def IOU_calc_fixed(y_true, y_pred):
        smooth = 0.001
        y_true_f = K.flatten(y_true)
        y_pred_f = K.flatten(y_pred)

        intersection = K.sum(y_true_f * y_pred_f)

        return 2*(intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
    return IOU_calc_fixed

def IOU_calc_loss():
    return lambda x: -IOU_calc()

def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True):
    from tensorflow.python.framework.graph_util import convert_variables_to_constants
    graph = session.graph
    with graph.as_default():
        freeze_var_names = list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or []))
        output_names = output_names or []
        print(output_names)
        input_graph_def = graph.as_graph_def()
        if clear_devices:
            for node in input_graph_def.node:
                node.device = ""
        frozen_graph = convert_variables_to_constants(session, input_graph_def,
                                                      output_names, freeze_var_names)
        # add for new test
        frozen_graph = tf.graph_util.remove_training_nodes(frozen_graph)
        return frozen_graph

def h5_to_pb(h5_model, output_dir, model_name, out_prefix = "output_"):
    out_nodes = []
    for i in range(len(h5_model.outputs)):
        out_nodes.append(out_prefix + str(i + 1))
        tf.identity(h5_model.output[i], out_prefix + str(i + 1))
    # add for ignore dropout at inference process(?)
    K.set_learning_phase(0)

    sess = K.get_session()
    from tensorflow.python.framework import graph_util,graph_io
    init_graph = sess.graph.as_graph_def()
    main_graph = graph_util.convert_variables_to_constants(sess, init_graph, out_nodes)
    graph_io.write_graph(main_graph, output_dir, name = model_name, as_text = False)

def load_graph(frozen_graph_filename):
    with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())

    for n in graph_def.node:
        if "input_" in n.name:
            input_name = n.name + ":0"

    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def, name='')
                
    return graph, input_name


def delete_ops_from_graph(graph_def, output_model_filepath):

    # Delete nodes
    nodes = []
    input_count = 0
    for node in graph_def.node:
        if "input" in node.name and "dropout" not in node.name:
            if "keras" not in node.name:
                if input_count == 0:
                    nodes.append(node)
                    input_count += 1
                else:
                    print('Drop', node.name)
            else:
                nodes.append(node)
        else:
            nodes.append(node)

    mod_graph_def = tf.GraphDef()
    mod_graph_def.node.extend(nodes)

    # Delete references to deleted nodes
    for node in mod_graph_def.node:
        inp_names = []
        for inp in node.input:
            if 'Neg' in inp:
                pass
            else:
                inp_names.append(inp)

        del node.input[:]
        node.input.extend(inp_names)

    with open(output_model_filepath, 'wb') as f:
        f.write(mod_graph_def.SerializeToString())


if __name__ == '__main__':

    config_path = r'unet_h5_to_pb_config.json'
    with open(config_path) as config_buffer:
        a = config_buffer.read()
        a = a.replace('\\', '\\\\')
        config = json.loads(a)
    """-------------------------Set Paths-----------------------------------"""
    h5_model_path = config["h5_md_path"]

    if "\\" in h5_model_path:
        h5_name = h5_model_path.split("\\")[-1]
        pb_model_name = h5_name.split(".")[0] + ".pb"

        if len(config["pb_save_path"]) == 0:
            output_path = h5_model_path.strip(h5_model_path.split("\\")[-1])
            if h5_model_path[0] == ".":
                output_path = "." + output_path
        else:
            output_path = config["pb_save_path"]
            if os.path.exists(output_path) is False:
                os.mkdir(output_path)
        out_md_path = os.path.join(output_path, pb_model_name)
    else:
        h5_name = h5_model_path.split("/")[-1]
        pb_model_name = h5_name.split(".")[0] + ".pb"

        if len(config["pb_save_path"]) == 0:
            output_path = h5_model_path.strip(h5_model_path.split("/")[-1])
            if h5_model_path[0] == ".":
                output_path = "." + output_path
        else:
            output_path = config["pb_save_path"]
            if os.path.exists(output_path) is False:
                os.mkdir(output_path)
        out_md_path = os.path.join(output_path, pb_model_name)

    """---------------------------load h5 model-----------------------------"""
    K.set_learning_phase(0)
    try:
        net_model = load_model(h5_model_path,
                               custom_objects={'IOU_calc': IOU_calc()})
        print("***Handling model with structure***")
    except ValueError as E:
        if "Cannot create group in read only mode" in str(E):
            print("***Handling model without structure***")
            input_shape = tuple(config["input_shape"])
            model_class = config["model_class"]
            net_model = Unet.small_unet(input_shape, model_class)
            net_model.load_weights(h5_model_path)
        else:
            print(E)

    print('***input is :', net_model.input.name)
    print('***output is:', net_model.output.name)
    """----------------------------save pb file-----------------------------"""
    if config["name_output_node"]:
        h5_to_pb(net_model, output_dir=output_path, model_name=pb_model_name)
    else:
        sess = K.get_session()
        frozen_graph = freeze_session(K.get_session(),
                                      output_names=[net_model.output.op.name])
        graph_io.write_graph(frozen_graph,
                             output_path, pb_model_name, as_text=False)
    print(f"===== FINISH converting model to pb file {out_md_path} =====")

    """----------------------------revise input-----------------------------"""
    K.clear_session()
    if config["dynamic_input"]:
        tf_md_path = os.path.join(output_path, pb_model_name)
        out_pb_path = tf_md_path.replace(".pb", "_dynamic.pb")
        resize_val = (None, None, config["model_inp_ch"])
        
        graph_model, input_name = load_graph(tf_md_path)
        graph_model_def = graph_model.as_graph_def()

        rp_input = Input(resize_val)
        
        tf.import_graph_def(graph_model_def, name='',
                            input_map={input_name: rp_input})

        graph = tf.get_default_graph()
        graph_def = graph.as_graph_def()
        #print(graph_def.node)
        delete_ops_from_graph = delete_ops_from_graph(graph_def, out_pb_path)
        print("=== Finish update model with dynamic input size ===")

the part of changing .pb input layer shape will be added with two layer before input,

which is "keras_learning_phase/input:0" and "keras_learning_phase:0" with bool as input type.

Since if I run code_1 and code_2 separately, this situation won't happen,

I think it might be caused by keras backend stuff or K.set_learning_phase(0)(?)

I've added "K.clear_session()" after code_1's part before entering code_2's part,

but the result is still the same.

So I'm wondering is there still something I can do to avoid this happening?

(Thanks in advance for any help or suggestion!)

Solution 1:^[1]

There is 2 solutions for this, but both must be executed inside the graph itself.

The easy one is set K.set_learning_phase(0) inside a Graph that you import from your default graph.

If that does not work out. You can try this trick. Because you are converting the weights of the model. You can trip these keras_learning_phase node from the graph by removing their connections to other nodes. You can follow this solution

However, for a quick demonstration for you:

def remove_keras_learning(od_graph_def):
    for node in od_graph_def.node:
        inp_names = []
        for inp in node.input:
            if not 'keras_learning_phase' in inp:
               inp_names.append(inp)

        del node.input[:]
        node.input.extend(inp_names)
                
    return od_graph_def

Here is the result: