J'ai implémenté une classe de réseau neuronal qui n'a toujours qu'une seule couche cachée, sans utiliser aucune bibliothèque, pas même numpy. J'ai tout fait de la manière dont j'avais compris que cela devait être, mais il n'apprend pas du tout, la perte augmente continuellement et je ne peux pas trouver où je me suis trompé, même après avoir regardé de nombreux exemples en ligne.
Voici ma classe MLP et une démonstration de sa tentative d'apprentissage de la fonction XOR :
import random
from math import exp
class MLP:
def __init__(self, numInputs, numHidden, numOutputs):
# MLP architecture sizes
self.numInputs = numInputs
self.numHidden = numHidden
self.numOutputs = numOutputs
# MLP weights
self.IH_weights = [[random.random() for i in range(numHidden)] for j in range(numInputs)]
self.HO_weights = [[random.random() for i in range(numOutputs)] for j in range(numHidden)]
# Gradients corresponding to weight matrices computed during backprop
self.IH_gradients = [[0 for i in range(numHidden)] for j in range(numInputs)]
self.HO_gradients = [[0 for i in range(numOutputs)] for j in range(numHidden)]
# Input, hidden and output neuron values
self.I = None
self.H = [0 for i in range(numHidden)]
self.O = [0 for i in range(numOutputs)]
self.H_deltas = [0 for i in range(numHidden)]
self.O_deltas = [0 for i in range(numOutputs)]
# Sigmoid
def activation(self, x):
return 1 / (1 + exp(-x))
# Derivative of Sigmoid
def activationDerivative(self, x):
return x * (1 - x)
# Squared Error
def calculateError(self, prediction, label):
return (prediction - label) ** 2
def forward(self, input):
self.I = input
for i in range(self.numHidden):
for j in range(self.numInputs):
self.H[i] += self.I[j] * self.IH_weights[j][i]
self.H[i] = self.activation(self.H[i])
for i in range(self.numOutputs):
for j in range(self.numHidden):
self.O[i] += self.activation(self.H[j] * self.HO_weights[j][i])
self.O[i] = self.activation(self.O[i])
return self.O
def backwards(self, label):
if label != list:
label = [label]
error = 0
for i in range(self.numOutputs):
neuronError = self.calculateError(self.O[i], label[i])
error += neuronError
self.O_deltas[i] = neuronError * self.activationDerivative(self.O[i])
for j in range(self.numHidden):
self.HO_gradients[j][i] += self.O_deltas[i] * self.H[j]
for i in range(self.numHidden):
neuronError = 0
for j in range(self.numOutputs):
neuronError += self.HO_weights[i][j] * self.O_deltas[j]
self.H_deltas[i] = neuronError * self.activationDerivative(self.H[i])
for j in range(self.numInputs):
self.IH_gradients[j][i] += self.H_deltas[i] * self.I[j]
return error
def updateWeights(self, learningRate):
for i in range(self.numInputs):
for j in range(self.numHidden):
self.IH_weights[i][j] += learningRate * self.IH_gradients[i][j]
for i in range(self.numHidden):
for j in range(self.numOutputs):
self.HO_weights[i][j] += learningRate * self.HO_gradients[i][j]
self.IH_gradients = [[0 for i in range(self.numHidden)] for j in range(self.numInputs)]
self.HO_gradients = [[0 for i in range(self.numOutputs)] for j in range(self.numHidden)]
data = [
[[0, 0], 0],
[[0, 1], 1],
[[1, 0], 1],
[[1, 1], 0]
]
mlp = MLP(2, 5, 1)
for epoch in range(100):
epochError = 0
for i in range(len(data)):
mlp.forward(data[i][0])
epochError += mlp.backwards(data[i][1])
print(epochError / len(data))
mlp.updateWeights(0.001)