import numpy as np class NNLayer: def __init__(self, input_size, output_size, activation_fn, activation_derivative_fn): self.input_size = input_size self.output_size = output_size self.activation_fn = activation_fn self.activation_derivative_fn = activation_derivative_fn self.weights = 2 * np.random.rand(input_size, output_size) - 0.5 self.biases = np.random.rand(output_size).reshape(1, -1) def forward(self, input_data): self.input_data = input_data self.z = np.dot(input_data, self.weights) + self.biases self.a = self.activation_fn(self.z) return self.a def backward(self, dA, learning_rate=0.01): dZ = dA * self.activation_derivative_fn(self.z) dW = np.dot(self.input_data.T, dZ) / self.input_data.shape[0] dB = np.sum(dZ, axis=0, keepdims=True) / self.input_data.shape[0] dInputs = np.dot(dZ, self.weights.T) self.weights -= learning_rate * dW self.biases -= learning_rate * dB return dInputs def sigmoid(x): return 1 / (1 + np.exp(-x)) def sigmoid_derivative(x): return sigmoid(x) * (1 - sigmoid(x)) def relu(x): return np.maximum(0, x) def relu_derivative(x): return np.heaviside(x, 1) def softmax(x): exp_x = np.exp(x - np.max(x, axis=1, keepdims=True)) return exp_x / np.sum(exp_x, axis=1, keepdims=True) def softmax_derivative(S): jacobian = np.diag(S) for i in range(len(jacobian)): for j in range(len(jacobian[i])): if i==j: jacobian[i][j] = S[i] * (1 - S[j]) else: jacobian[i][j] = -S[j] * S[i] return jacobian def cross_entropy_loss(y_pred, y_true): epsilon = 1e-15 y_pred = np.clip(y_pred, epsilon, 1 - epsilon) tmp = y_true * np.log(y_pred) loss = -np.sum(tmp, axis=1) loss = np.mean(loss) return loss def cross_entropy_derivative(y_pred, y_true): return y_pred - y_true