From ec2060c375e99bb29e264c7768630398961f6ef5 Mon Sep 17 00:00:00 2001
From: Ruben-FreddyLoafers <rubenseitz.q@gmail.com>
Date: Tue, 9 Dec 2025 13:49:17 +0100
Subject: [PATCH] VQ done????

---
 05_mnist_vectorquant/vector_quantization.py | 36 +++++++++++----------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/05_mnist_vectorquant/vector_quantization.py b/05_mnist_vectorquant/vector_quantization.py
index 8c30fb0..746c8fb 100644
--- a/05_mnist_vectorquant/vector_quantization.py
+++ b/05_mnist_vectorquant/vector_quantization.py
@@ -40,51 +40,53 @@ def knn_predict_batch(X_batch, k=3):
     for test_img in X_batch:
         distances = []
         
-        # Euclidean distance to each prototype
-        for prototype in prototypes:
-            # distance = sqrt(sum((test_img - prototype)^2))
+        # Euclidean distance 
+        for prototype in prototypes: # For each prototype
             diff = test_img - prototype
             distance = np.sqrt(np.sum(diff ** 2))
             distances.append(distance)
         
         # Find indices of k nearest neighbors
         distances = np.array(distances)
-        nearest_k_indices = np.argsort(distances)[:k] # returns indices of array with sorted distances
+        # sort distances and put the indices of the sorted array in an array
+        nearest_k_indices = np.argsort(distances)[:k] # put indices of the values with k-lowest distances in nearest_k_indices
         
-        # Get labels of the k nearest neighbors
+        # Get array with labels of the k nearest neighbors
         nearest_k_labels = prototype_labels[nearest_k_indices]
         
         # Majority vote
-        prediction = np.bincount(nearest_k_labels, minlength=10).argmax()
-        preds.append(prediction)
+        # count occurences and put that count in an array at the index of that value 
+        # print(np.bincount(nearest_k_labels, minlength=10)) # debugging
+        prediction = np.bincount(nearest_k_labels, minlength=10).argmax() # argmax returns index of highest value (which is the actual value/number!!) 
+        preds.append(prediction) # prediction for this test image
     
-    return np.array(preds)
+    return np.array(preds) # prediction for every test image
 
 
 # Evaluate on first N_TEST test samples
 N_TEST = 1000
 print(f"Evaluating on {N_TEST} test samples...") # debugging
 
-X_eval = test_data[:N_TEST]
-y_eval = test_labels_set[:N_TEST]
+data_eval = test_data[:N_TEST]
+label_eval = test_labels_set[:N_TEST]
 
-preds = knn_predict_batch(X_eval, k=5)
+preds = knn_predict_batch(data_eval, k=3)
 
-accuracy = np.mean(preds == y_eval)
+accuracy = np.mean(preds == label_eval) # calc accuracy
 
 print("Predictions:", preds[:20])
-print("True labels:", y_eval[:20])
+print("True labels:", label_eval[:20])
 print("Accuracy:", accuracy)
 
 # Visualize first 20 predictions
-fig, axes = plt.subplots(4, 5, figsize=(12, 10))
+fig, axes = plt.subplots(10, 5, figsize=(12, 10))
 axes = axes.flatten()
 
-for i in range(0, 20):
+for i in range(0, 50):
     # Reshape flattened image back to 28x28
-    img = X_eval[i].reshape(28, 28)
+    img = data_eval[i].reshape(28, 28)
     axes[i].imshow(img, cmap='gray')
-    axes[i].set_title(f"Pred: {preds[i]}, True: {y_eval[i]}")
+    axes[i].set_title(f"Pred: {preds[i]}, True: {label_eval[i]}")
     axes[i].axis('off')
 
 plt.tight_layout()