From 25f1991a25ede8df595a3989c717c67c23868c8c Mon Sep 17 00:00:00 2001 From: "flyingscorpio@clevo" Date: Wed, 25 Jan 2023 16:04:29 +0100 Subject: [PATCH] Fix perceptron --- big-data/tp1/main.py | 48 +++++++++++++++++++++++++++----------- big-data/tp1/perceptron.py | 14 +++++------ 2 files changed, 41 insertions(+), 21 deletions(-) diff --git a/big-data/tp1/main.py b/big-data/tp1/main.py index 2a8c6eb..5b054fe 100755 --- a/big-data/tp1/main.py +++ b/big-data/tp1/main.py @@ -19,22 +19,21 @@ def main(): # Affichage des points plot_dots(data, expected) - training_data = data[:nb_training_points] - testing_data = data[nb_training_points:] - testing_expected = expected[nb_training_points:] - # Apprentissage + training_data = data[:nb_training_points] perceptron = Perceptron() perceptron.train(training_data, expected) + # Affichage de la ligne de séparation + plot_separation_line(data, perceptron) + plt.show() + # Classement sur les données de test + testing_data = data[nb_training_points:] + testing_expected = expected[nb_training_points:] predicted = [perceptron.predict(x) for x in testing_data] print(error_rate(predicted, testing_expected)) - - # Affichage de la ligne de séparation - x_axis = np.linspace(0, 1, 50) - y_axis = -(perceptron.weight[0] * x_axis + perceptron.bias) / perceptron.weight[1] - plt.plot(x_axis, y_axis, c="black") + plot_dots(testing_data, testing_expected) plt.show() @@ -47,12 +46,19 @@ def generate_dataset( Return (data, expected), where expected is the expected group for each group. """ + # Création groupe 1 data1 = np.random.rand(nb_data_points // 2, 2) + 3 expected1 = np.array([1 for _ in data1]) + + # Création groupe 0 data0 = np.random.rand(nb_data_points // 2, 2) + 1 expected0 = np.array([0 for _ in data0]) + + # Concaténation des deux groupes data = np.concatenate((data1, data0)) expected = np.concatenate((expected1, expected0)) + + # Mélange des données permutation = np.random.permutation(nb_data_points) data = data[permutation] expected = expected[permutation] @@ -64,6 +70,7 @@ def error_rate(predicted, expected): """Calculate the error rate of a prediction set""" assert len(predicted) == len(expected) + errors = 0 for i, item in enumerate(predicted): if item != expected[i]: @@ -71,19 +78,32 @@ def error_rate(predicted, expected): return errors / len(predicted) -def plot_dots(data, expected): +def plot_dots(data, data_class): """Plot all dots in the data""" + + assert len(data) == len(data_class) + # abscisses de la classe 1 - xpoints_1 = [x[0] for i, x in enumerate(data) if expected[i] == 1] + xpoints_1 = [x[0] for i, x in enumerate(data) if data_class[i] == 1] # ordonnées de la classe 1 - ypoints_1 = [x[1] for i, x in enumerate(data) if expected[i] == 1] + ypoints_1 = [x[1] for i, x in enumerate(data) if data_class[i] == 1] # abscisses de la classe 0 - xpoints_0 = [x[0] for i, x in enumerate(data) if expected[i] == 0] + xpoints_0 = [x[0] for i, x in enumerate(data) if data_class[i] == 0] # ordonnées de la classe 0 - ypoints_0 = [x[1] for i, x in enumerate(data) if expected[i] == 0] + ypoints_0 = [x[1] for i, x in enumerate(data) if data_class[i] == 0] plt.plot(xpoints_1, ypoints_1, 'or', ms=1) # rouge pour la classe 1 plt.plot(xpoints_0, ypoints_0, 'ob', ms=1) # bleu pour la classe 0 +def plot_separation_line(data, perceptron): + """Plot the computed seperation line""" + + min_x = min(x for x, _ in data) + max_x = max(x for x, _ in data) + x_axis = np.linspace(min_x, max_x, 50) + y_axis = -(perceptron.weight[0] * x_axis + perceptron.bias) / perceptron.weight[1] + plt.plot(x_axis, y_axis, c="black") + + if __name__ == "__main__": main() diff --git a/big-data/tp1/perceptron.py b/big-data/tp1/perceptron.py index 3ea2470..c1df300 100644 --- a/big-data/tp1/perceptron.py +++ b/big-data/tp1/perceptron.py @@ -1,6 +1,5 @@ """Module to hold the Perceptron class""" -import math import numpy as np @@ -12,22 +11,23 @@ class Perceptron: self.weight = np.zeros(2) self.bias = 0.5 self.epoch = 0 # current epoch of the training - self.max_epoch = 5 # le nombre de générations à ne pas dépasser + self.max_epoch = 100 # le nombre de générations à ne pas dépasser def sigmoid(self, number: float) -> float: """Apply the sigmoid function to a number""" - return 1 / (1 + math.exp(-number)) + + return 1 / (1 + np.exp(-number)) def threshold(self, number: float) -> int: """Decide whether number should be group 1 or 0""" - if number > self.bias: + if number > 0.5: return 1 return 0 - def predict(self, point): + def predict(self, point: np.ndarray[float]): """Make a prediction on a single point""" - number = sum(self.weight * point) + self.bias + number = np.dot(self.weight, point) + self.bias sig = self.sigmoid(number) thres = self.threshold(sig) return thres @@ -44,7 +44,7 @@ class Perceptron: if predicted != expected[i]: has_errors = True error = expected[i] - predicted - self.weight += self.alpha * error * point + self.weight += np.dot(self.alpha * error, point) self.bias += self.alpha * error self.epoch += 1 if self.epoch >= self.max_epoch: