Hi All,
I have written down the code for a generic Multi-Class FeedForward Network. Each time I execute the fit function, the loss plot varies.
Can someone look into the code and let me know how to sort this out.
class FFMulti_NNetwork:
def __init__(self, n_inputs, n_outputs, hidden_sizes=[2]):
self.nx = n_inputs
self.ny = n_outputs
self.nh = len(hidden_sizes)
self.sizes = [self.nx] + hidden_sizes + [self.ny]
self.W = {}
self.B = {}
for i in range(self.nh+1):
self.W[i+1] = np.random.randn(self.sizes[i], self.sizes[i+1])
self.B[i+1] = np.zeros((1, self.sizes[i+1]))
def sigmoid(self, X):
return 1.0/(1.0 + np.exp(-X))
def softmax(self, X):
exps = np.exp(X)
return exps / np.sum(exps, axis = 1).reshape(-1,1)
def forward_pass(self, X):
self.A = {}
self.H = {}
self.H[0] = X
for i in range(self.nh):
self.A[i+1] = np.matmul(self.H[i], self.W[i+1]) + self.B[i+1]
self.H[i+1] = self.sigmoid(self.A[i+1])
self.A[self.nh+1] = np.matmul(self.H[self.nh], self.W[self.nh+1]) + self.B[self.nh+1]
self.H[self.nh+1] = self.softmax(self.A[self.nh+1])
return self.H[self.nh+1]
def grad_sigmoid(self, X):
return X*(1-X)
def grad(self, X, Y):
self.forward_pass(X)
self.dW = {}
self.dB = {}
self.dH = {}
self.dA = {}
L = self.nh + 1
self.dA[L] = (self.H[L] - Y)
for k in range(L, 0, -1):
self.dW[k] = np.matmul(self.H[k-1].T, self.dA[k])
self.dB[k] = np.sum(self.dA[k], axis = 0).reshape(1,-1)
self.dH[k-1] = np.matmul(self.dA[k], self.W[k].T)
self.dA[k-1] = np.multiply(self.dH[k-1], self.grad_sigmoid(self.H[k-1]))
def fit(self, X, Y, epochs=1, learning_rate=1, display_loss=False):
if display_loss:
loss = {}
for e in tqdm_notebook(range(epochs), total=epochs, unit="epoch"):
self.grad(X, Y)
m = X.shape[0]
for i in range(self.nh+1):
self.W[i+1] -= learning_rate * (self.dW[i+1] / m)
self.B[i+1] -= learning_rate * (self.dB[i+1] / m)
if display_loss:
Y_pred = self.predict(X)
loss[e] = log_loss(np.argmax(Y, axis=1), Y_pred)
if display_loss:
plt.plot(list(loss.values()))
plt.xlabel('Epochs')
plt.ylabel('CE')
plt.show()
def predict(self, X):
Y_pred = self.forward_pass(X)
return np.array(Y_pred).squeeze()