'tensorflow nn_model for DNA sequences: Matrix size-incompatible: In[0]: [2,1], In[1]: [784,300]

Hope anyone can help a beginner here. I'm building a proof-of concept tensorflow classifier for DNA sequences. However, the NN model does not let through train and test vectors saying the matrix size is incompatible. Where could be the mistake in my implementation?

import numpy as np
import pandas as pd

df=pd.DataFrame(columns=['sequences', 'label'])
a='ttttccagaattctcttagttt gtgatgtctttattgcttctattt'
b='ctcctgcttgctttttttcttg ggtttctgatattctttaaaggat'
c='tcctgcttgctttttttcttgg gtttctgatattctttaaaggatt'
df.sequences=[a,b,c]
df.label=[1,1,1]

from sklearn.preprocessing import LabelEncoder, OneHotEncoder
integer_encoder = LabelEncoder()
one_hot_encoder = OneHotEncoder(categories='auto')
input_features = []
sequences=df.sequences
for sequence in sequences:
     integer_encoded = integer_encoder.fit_transform(list(sequence))
     integer_encoded = np.array(integer_encoded).reshape(-1, 1)
     one_hot_encoded = one_hot_encoder.fit_transform(integer_encoded)
     input_features.append(one_hot_encoded.toarray())

np.set_printoptions(threshold=40)
input_features = np.stack(input_features)
print("Example sequence\n-----------------------")
print('DNA Sequence #1:\n',sequences[0][:10],'...',sequences[0][-10:])
print('One hot encoding of Sequence #1:\n',input_features[0].T)

labels=df.label
one_hot_encoder = OneHotEncoder(categories='auto')
labels = np.array(labels).reshape(-1, 1)
input_labels = one_hot_encoder.fit_transform(labels).toarray()
print('Labels:\n',labels.T)
print('One-hot encoded labels:\n',input_labels.T)

from sklearn.model_selection import train_test_split
train_features, test_features, train_labels, test_labels = train_test_split(input_features, input_labels, test_size=0.25, random_state=42)


def get_batch(x_data, y_data, batch_size):
     idxs = np.random.randint(0, len(y_data), batch_size)
     return x_data[idxs,:,:], y_data[idxs]

epochs = 10
batch_size = 100

x_train = train_features
x_test = train_labels
x_train = x_train / 255.0
x_test = x_test / 255.0
x_test = tf.Variable(x_test)

W1 = tf.Variable(tf.random.normal([784, 300], stddev=0.03), name='W1')
b1 = tf.Variable(tf.random.normal([300]), name='b1')
W2 = tf.Variable(tf.random.normal([300, 10], stddev=0.03), name='W2')
b2 = tf.Variable(tf.random.normal([10]), name='b2')

def nn_model(x_input, W1, b1, W2, b2): 
     x_input = tf.reshape(x_input, (x_input.shape[0], -1))
     x = tf.add(tf.matmul(tf.cast(x_input, tf.float32), W1), b1) #ERROR FROM HERE?
     x = tf.nn.relu(x)
     logits = tf.add(tf.matmul(x, W2), b2)
     return logits

def loss_fn(logits, labels):
     cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=labels,logits=logits))
     return cross_entropy

optimizer = tf.keras.optimizers.Adam()
total_batch = int(len(y_train) / batch_size)

for epoch in range(epochs):
    avg_loss = 0
    for i in range(total_batch):
        batch_x, batch_y = get_batch(x_train, y_train, batch_size=batch_size)
        # create tensors
        batch_x = tf.Variable(batch_x)
        batch_y = tf.Variable(batch_y)
        # create a one hot vector
        batch_y = tf.one_hot(batch_y, 10)
        with tf.GradientTape() as tape:
            logits = nn_model(batch_x, W1, b1, W2, b2)
            loss = loss_fn(logits, batch_y)
        gradients = tape.gradient(loss, [W1, b1, W2, b2])
        optimizer.apply_gradients(zip(gradients, [W1, b1, W2, b2]))
        avg_loss += loss / total_batch
    test_logits = nn_model(x_test, W1, b1, W2, b2) ##ERROR LOG APPEARS HERE
    max_idxs = tf.argmax(test_logits, axis=1)
    test_acc = np.sum(max_idxs.numpy() == y_test) / len(y_test)
    print(f"Epoch: {epoch + 1}, loss={avg_loss:.3f}, test set accuracy = {test_acc*100:.3f}%")
print("\nTraining complete!")

Traceback (most recent call last): File "", line 16, in File "", line 4, in nn_model File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/util/traceback_utils.py", line 153, in error_handler raise e.with_traceback(filtered_tb) from None File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/ops.py", line 7107, in raise_from_not_ok_status raise core._status_to_exception(e) from None # pylint: disable=protected-access tensorflow.python.framework.errors_impl.InvalidArgumentError: Matrix size-incompatible: In[0]: [2,1], In[1]: [784,300] [Op:MatMul]

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution	Source

'tensorflow nn_model for DNA sequences: Matrix size-incompatible: In[0]: [2,1], In[1]: [784,300]

Sources

Related Questions