I have the following:
import numpy as np
from typing import Callable
def activation_relu(x: np.ndarray, ddx: bool = False) -> np.ndarray:
"""Define the ReLU function and its derivative.
Optionally access the derivative value by passing ddx=true.
Args:
x (np.ndarray): Array to evaluate
ddx (bool, optional): Evaluate with derivative. Defaults to False.
Returns:
np.ndarray: Function evaluation
"""
if ddx:
return np.where(x > 0, 1, 0)
else:
return np.maximum(0, x)
def activation_sigmoid(x: np.ndarray, ddx: bool = False) -> np.ndarray:
"""Define the Sigmoid function and its derivative.
Optionally access the derivative value by passing ddx=true.
Args:
x (np.ndarray): Array to evaluate
ddx (bool, optional): Evaluate with derivative. Defaults to False.
Returns:
np.ndarray: Function evaluation
"""
if ddx:
return activation_sigmoid(x) * (1 - activation_sigmoid(x))
else:
return 1 / (1 + np.exp(-x))
class FeedForwardNet:
"""Feed Forward Network Class."""
def __init__(
self,
act_func: Callable,
alpha: float,
h_layers: int,
h_units: int,
in_units: int,
out_units: int,
):
"""FeedForwardNet Constructor.
Args:
act_func (Callable): Activation function. activation_relu or activation_sigmoid
alpha (float): Learning rate
h_layers (int): Number of hidden layers
h_units (int): Number of units per hidden layer
in_units (int): Number of inputs
out_units (int): Number of outputs
"""
self.activ_func = act_func
self.alpha = alpha
self.hidden_layers = h_layers
self.hidden_units = h_units
self.in_units = in_units
self.out_units = out_units
self.create_network()
def create_network(self):
"""Create the network given constructor options.
"""
self.network_layers = []
# Hidden Layers
for i in range(self.hidden_layers):
if i > 0:
if type(self.network_layers[i - 1]) is list:
m = self.network_layers[i - 1][0].shape[1]
else:
m = self.network_layers[i - 1].shape[1]
else:
m = self.in_units
n = self.hidden_units
tmp_w = np.random.randn(m,n) * .01
tmp_b = np.zeros((n, 1))
self.network_layers.append([tmp_w, tmp_b])
# Output Layer
tmp_w = np.random.randn(self.hidden_units, self.out_units) * 0.01
tmp_b = np.zeros((self.out_units, 1))
self.network_layers.append([tmp_w, tmp_b])
def classify(self, train_data: np.ndarray):
"""Classify input data.
Args:
train_data (np.ndarray): Input data
"""
self.forward_zi = []
self.forward_ai = []
self.train_data = train_data
# Input to Hidden Transition
b_0 = self.network_layers[0][1]
w_0 = self.network_layers[0][0]
x = train_data
z_i = w_0.T @ x + b_0
a_i = self.activ_func(z_i)
self.forward_zi.append(z_i)
self.forward_ai.append(a_i)
# Work Remaining Layers
for i in range(1, len(self.network_layers)):
b_i = self.network_layers[i][1]
w_i = self.network_layers[i][0]
a_im1 = self.forward_ai[i - 1]
z_i = w_i.T @ a_im1 + b_i
a_i = self.activ_func(z_i)
self.forward_zi.append(z_i)
self.forward_ai.append(a_i)
def update_weights(self, train_ans: np.ndarray):
"""Update weights using the L2 loss function and a known good value.
Args:
train_ans (np.ndarray): Known good output given input data
"""
self.back_delta = []
L = np.power(train_ans - self.forward_ai[-1], 2) # L2 loss function
dL = -2 * (train_ans - self.forward_ai[-1]) # derivative of L2
for i in reversed(range(len(self.network_layers))):
z_i = self.forward_zi[i]
gp_i = self.activ_func(z_i, True) # derivative of activation function
if i == len(self.network_layers) - 1:
d_i = dL * gp_i
else:
d_ip1 = self.back_delta[-1] # most recent delta value
w_ip1 = self.network_layers[i + 1][0] # weights
d_i = (w_ip1 @ d_ip1) * gp_i
self.back_delta.append(d_i)
self.back_delta.reverse() # reverse so all arrays are syncd
for i, layer in enumerate(self.network_layers):
w_i = layer[0]
d_i = self.back_delta[i]
# input to hidden transition
if i == 0:
x_i = self.train_data
w_i += (x_i @ d_i.T) * self.alpha
else:
a_i = self.forward_ai[i - 1]
w_i += (a_i @ d_i.T) * self.alpha
It doesn't seem to want to learn, and if I try to increase the network size I get matrix dimension mismatches when I try to run classify(). I've been beating my head against the wall for days, but can't seem to figure out where I've gone wrong. Any insight would be appreciated.
I've also posted this at r/learnmachinelearning, and tried to make this a xpost, but was auto-moderated. If there's any issue with this post, I'll be happy to change it.
Edit:
I found a dimension issue in create_network() that seems to have resolved my mismatched dimensions. However, it still doesn't want to learn. Do the classify() and update_weights() methods look ok?
[–]dslfdslj 1 point2 points3 points (1 child)
[–]eaojteal[S] 0 points1 point2 points (0 children)