From 25f1991a25ede8df595a3989c717c67c23868c8c Mon Sep 17 00:00:00 2001
From: "flyingscorpio@clevo" <tfranken@protonmail.com>
Date: Wed, 25 Jan 2023 16:04:29 +0100
Subject: [PATCH] Fix perceptron

---
 big-data/tp1/main.py       | 48 +++++++++++++++++++++++++++-----------
 big-data/tp1/perceptron.py | 14 +++++------
 2 files changed, 41 insertions(+), 21 deletions(-)

diff --git a/big-data/tp1/main.py b/big-data/tp1/main.py
index 2a8c6eb..5b054fe 100755
--- a/big-data/tp1/main.py
+++ b/big-data/tp1/main.py
@@ -19,22 +19,21 @@ def main():
     # Affichage des points
     plot_dots(data, expected)
 
-    training_data = data[:nb_training_points]
-    testing_data = data[nb_training_points:]
-    testing_expected = expected[nb_training_points:]
-
     # Apprentissage
+    training_data = data[:nb_training_points]
     perceptron = Perceptron()
     perceptron.train(training_data, expected)
 
+    # Affichage de la ligne de séparation
+    plot_separation_line(data, perceptron)
+    plt.show()
+
     # Classement sur les données de test
+    testing_data = data[nb_training_points:]
+    testing_expected = expected[nb_training_points:]
     predicted = [perceptron.predict(x) for x in testing_data]
     print(error_rate(predicted, testing_expected))
-
-    # Affichage de la ligne de séparation
-    x_axis = np.linspace(0, 1, 50)
-    y_axis = -(perceptron.weight[0] * x_axis + perceptron.bias) / perceptron.weight[1]
-    plt.plot(x_axis, y_axis, c="black")
+    plot_dots(testing_data, testing_expected)
 
     plt.show()
 
@@ -47,12 +46,19 @@ def generate_dataset(
     Return (data, expected), where expected is the expected group for each group.
     """
 
+    # Création groupe 1
     data1 = np.random.rand(nb_data_points // 2, 2) + 3
     expected1 = np.array([1 for _ in data1])
+
+    # Création groupe 0
     data0 = np.random.rand(nb_data_points // 2, 2) + 1
     expected0 = np.array([0 for _ in data0])
+
+    # Concaténation des deux groupes
     data = np.concatenate((data1, data0))
     expected = np.concatenate((expected1, expected0))
+
+    # Mélange des données
     permutation = np.random.permutation(nb_data_points)
     data = data[permutation]
     expected = expected[permutation]
@@ -64,6 +70,7 @@ def error_rate(predicted, expected):
     """Calculate the error rate of a prediction set"""
 
     assert len(predicted) == len(expected)
+
     errors = 0
     for i, item in enumerate(predicted):
         if item != expected[i]:
@@ -71,19 +78,32 @@ def error_rate(predicted, expected):
     return errors / len(predicted)
 
 
-def plot_dots(data, expected):
+def plot_dots(data, data_class):
     """Plot all dots in the data"""
+
+    assert len(data) == len(data_class)
+
     # abscisses de la classe 1
-    xpoints_1 = [x[0] for i, x in enumerate(data) if expected[i] == 1]
+    xpoints_1 = [x[0] for i, x in enumerate(data) if data_class[i] == 1]
     # ordonnées de la classe 1
-    ypoints_1 = [x[1] for i, x in enumerate(data) if expected[i] == 1]
+    ypoints_1 = [x[1] for i, x in enumerate(data) if data_class[i] == 1]
     # abscisses de la classe 0
-    xpoints_0 = [x[0] for i, x in enumerate(data) if expected[i] == 0]
+    xpoints_0 = [x[0] for i, x in enumerate(data) if data_class[i] == 0]
     # ordonnées de la classe 0
-    ypoints_0 = [x[1] for i, x in enumerate(data) if expected[i] == 0]
+    ypoints_0 = [x[1] for i, x in enumerate(data) if data_class[i] == 0]
     plt.plot(xpoints_1, ypoints_1, 'or', ms=1)  # rouge pour la classe 1
     plt.plot(xpoints_0, ypoints_0, 'ob', ms=1)  # bleu pour la classe 0
 
 
+def plot_separation_line(data, perceptron):
+    """Plot the computed seperation line"""
+
+    min_x = min(x for x, _ in data)
+    max_x = max(x for x, _ in data)
+    x_axis = np.linspace(min_x, max_x, 50)
+    y_axis = -(perceptron.weight[0] * x_axis + perceptron.bias) / perceptron.weight[1]
+    plt.plot(x_axis, y_axis, c="black")
+
+
 if __name__ == "__main__":
     main()
diff --git a/big-data/tp1/perceptron.py b/big-data/tp1/perceptron.py
index 3ea2470..c1df300 100644
--- a/big-data/tp1/perceptron.py
+++ b/big-data/tp1/perceptron.py
@@ -1,6 +1,5 @@
 """Module to hold the Perceptron class"""
 
-import math
 import numpy as np
 
 
@@ -12,22 +11,23 @@ class Perceptron:
         self.weight = np.zeros(2)
         self.bias = 0.5
         self.epoch = 0  # current epoch of the training
-        self.max_epoch = 5  # le nombre de générations à ne pas dépasser
+        self.max_epoch = 100  # le nombre de générations à ne pas dépasser
 
     def sigmoid(self, number: float) -> float:
         """Apply the sigmoid function to a number"""
-        return 1 / (1 + math.exp(-number))
+
+        return 1 / (1 + np.exp(-number))
 
     def threshold(self, number: float) -> int:
         """Decide whether number should be group 1 or 0"""
-        if number > self.bias:
+        if number > 0.5:
             return 1
         return 0
 
-    def predict(self, point):
+    def predict(self, point: np.ndarray[float]):
         """Make a prediction on a single point"""
 
-        number = sum(self.weight * point) + self.bias
+        number = np.dot(self.weight, point) + self.bias
         sig = self.sigmoid(number)
         thres = self.threshold(sig)
         return thres
@@ -44,7 +44,7 @@ class Perceptron:
                 if predicted != expected[i]:
                     has_errors = True
                     error = expected[i] - predicted
-                    self.weight += self.alpha * error * point
+                    self.weight += np.dot(self.alpha * error, point)
                     self.bias += self.alpha * error
             self.epoch += 1
             if self.epoch >= self.max_epoch: