Add first draft without testing

martinreder · Dec 10, 2023 · bdf153b · bdf153b
1 parent e65c33b
commit bdf153b
Show file tree

Hide file tree

Showing 17 changed files with 1,523 additions and 0 deletions.
diff --git a/dataset.ipynb b/dataset.ipynb
diff --git a/images/class_distribution.png b/images/class_distribution.png
diff --git a/images/image-1.png b/images/image-1.png
diff --git a/images/image-2.png b/images/image-2.png
diff --git a/images/image.png b/images/image.png
diff --git a/images/lr1.png b/images/lr1.png
diff --git a/images/lr2.png b/images/lr2.png
diff --git a/images/val_acc1.png b/images/val_acc1.png
diff --git a/images/val_acc2.png b/images/val_acc2.png
diff --git a/images/val_acc3.png b/images/val_acc3.png
diff --git a/images/val_loss1.png b/images/val_loss1.png
diff --git a/images/val_loss2.png b/images/val_loss2.png
diff --git a/load_fer2013.py b/load_fer2013.py
@@ -0,0 +1,38 @@
+import os
+import subprocess
+import pandas as pd
+import numpy as np
+import tensorflow as tf
+from typing import Tuple
+
+
+def load_fer2013() -> pd.DataFrame:
+    """Load the emotion dataset as a tf.data.Dataset."""
+    if not os.path.exists("fer2013"):
+        print("Downloading the face emotion dataset...")
+        subprocess.check_output(
+            "curl -SL https://www.dropbox.com/s/opuvvdv3uligypx/fer2013.tar | tar xz",
+            shell=True,
+        )
+    print("Loading dataset...")
+    data = pd.read_csv("fer2013/fer2013.csv")
+    return data
+
+
+def preprocess(row, num_classes):
+    # Convert the 'pixels' tensor to string and split
+    pixel_string = row["pixels"]
+    pixel_values = tf.strings.split([pixel_string], sep=" ")
+    pixel_values = tf.strings.to_number(pixel_values, out_type=tf.int32)
+
+    # Convert the RaggedTensor to a regular tensor
+    pixel_values = tf.RaggedTensor.to_tensor(pixel_values, default_value=0)
+
+    # Reshape and normalize the pixel values
+    pixels = tf.reshape(pixel_values, (48, 48, 1))
+    pixels = tf.cast(pixels, tf.float32) / 255.0
+
+    # Prepare the label
+    emotion = tf.one_hot(row["emotion"], depth=num_classes)
+
+    return pixels, emotion
diff --git a/mlflow-start.sh b/mlflow-start.sh
@@ -0,0 +1 @@
+mlflow server --host 127.0.0.1 --port 8080
diff --git a/readme.md b/readme.md
@@ -0,0 +1,113 @@
+# Facial Emotion Recognition
+
+This repository is my project hand-in for the AKT3 course on Deep Learning & Computer Vision.
+
+## Dataset
+
+For training this model we will be using the [FER2013](https://www.kaggle.com/datasets/msambare/fer2013) dataset.
+
+### Example Data
+
+The dataset contains 48x48 images of human faces.
+
+![](images/image.png)
+![](images/image-1.png)
+
+### Analysis
+
+![](images/class_distribution.png)
+
+Using the distribution we can determine a baseline accuracy.
+
+`HappyCounts / TotalCounts = 0.25`
+
+`Baseline accuracy = 25%`
+
+So by always guessing `Happy` we could reach an accuracy of 25%. Our goal is to improve that with the CNN.
+
+## Baseline
+
+Using the `train.py` script we are training a Facial emotion Recognition model that classifies images of human faces on 7 emotions (`"Angry", "Disgust", "Fear", "Happy", "Sad", "Surprise", "Neutral"`).
+
+We split up the dataset into train, validation and test data.
+
+* Train dataset size: 25120 examples
+* Validation dataset size: 7179 examples
+* Test dataset size: 3588 examples
+
+### Results
+
+![](images/image-2.png)
+
+As shown in the graphs above we achieve very poor performance with our baseline parameters.
+
+| Parameter                | Value                    |
+|--------------------------|--------------------------|
+| learning_rate            | 0.01                     |
+| loss                     | categorical_crossentropy |
+| epochs                   | 50                       |
+| batch_size               | 128                      |
+| early_stopping_patience  | 7                        |
+| lr_patience              | 5                        |
+| lr_reduction_factor      | 0.1                      |
+| optimizer                | Adam                     |
+| num_classes              | 7                        |
+| input_shape              | (48, 48, 1)              |
+| shuffle                  | True                     |
+| restore_best_weights     | True                     |
+
+## Experiment 1 - Improving validation-accuracy
+
+In my first run the model only achieved a validation accuracy of 21% which is very poor. I was confused because other resources showed me that on this dataset significantly higher validation accuracies with similar CNNs could be achieved.
+
+My hypothesis is that I chose a far to high starting learning rate which lead to very early convergence and therefore significant underfitting.
+By reducing the learning rate I expect better results.
+
+| Parameter                | Value                 |
+|--------------------------|-----------------------|
+| learning_rate            | 0.001                 |
+
+### Results 
+
+Validation Loss           |  Validation Accuracy     | Learning Rate
+:------------------------:|:------------------------:|:-------------------------:
+![](images/val_loss1.png) | ![](images/val_acc1.png) | ![](images/lr1.png)
+
+As we can see in the resulting charts my hypothesis was correct and by reducing the learning rate we achieve much better results.
+
+## Experiment 2 - Smoothing the validation-loss curve
+
+The new validation loss curve is very erratic. I want to make it smoother and reduce the bumpiness of the curve. For this I again will lower the learning rate by a factor of 10.
+
+| Parameter                | Value                 |
+|--------------------------|-----------------------|
+| learning_rate            | 0.0001                |
+
+### Results
+
+As we can see in the resulting charts my hypothesis was correct and by reducing the learning rate the curve is much less erratic.
+
+Validation Loss           |  Validation Accuracy     | Learning Rate
+:------------------------:|:------------------------:|:-------------------------:
+![](images/val_loss2.png) | ![](images/val_acc2.png) | ![](images/lr2.png)
+
+## Experiment 3 - Disabling the restore-best-weights option
+
+For some reason in the EarlyStopping callback the restore_best_weights option actually chooses a worse configuration of the model at the end. By disabling the option we want to prohibit that behaviour.
+
+``` python
+keras.callbacks.EarlyStopping(
+                patience=params["early_stopping_patience"],
+                restore_best_weights=False
+            ),
+```
+
+| Parameter                | Value                 |
+|--------------------------|-----------------------|
+| restore_best_weights     | False                 |
+
+### Results
+
+In the following graph we see that by disabling the `restore_best_weights` option we can actually keep the better model in the end.
+
+![](images/val_acc3.png)
diff --git a/readme.pdf b/readme.pdf
diff --git a/train.py b/train.py
@@ -0,0 +1,146 @@
+from typing import Tuple, Dict, List
+import mlflow
+import mlflow.tensorflow
+from datetime import datetime
+from tensorflow import keras
+from keras.layers import (
+    Dense,
+    Flatten,
+    Conv2D,
+    MaxPooling2D,
+    Dropout,
+    BatchNormalization,
+)
+from keras.models import Sequential
+from keras.utils import plot_model
+import tensorflow as tf
+from load_fer2013 import load_fer2013, preprocess
+
+
+def setup_mlflow() -> None:
+    mlflow.set_tracking_uri("http://127.0.0.1:8080")
+    experiment_name = "Baseline"
+    experiment_description = (
+        "This is a neural network for classifiying human emotions based on facial expressions."
+        "This experiment will create a baseline neural network for further experiments."
+    )
+    experiment_tags = {
+        "project_name": "facial-emotion-recognition",
+        "experiment_name": experiment_name,
+        "dataset": "fer2013",
+        "mlflow.note.content": experiment_description,
+        "date": datetime.now().strftime("%d.%m.%Y %H:%M"),
+    }
+    mlflow.set_experiment(experiment_name)
+    mlflow.set_experiment_tags(experiment_tags)
+    mlflow.tensorflow.autolog()
+
+
+def create_model(
+    input_shape: Tuple[int, int, int], num_classes: int, params
+) -> Sequential:
+    model = Sequential(
+        [
+            Conv2D(32, (3, 3), activation="relu", input_shape=input_shape),
+            MaxPooling2D(),
+            BatchNormalization(),
+            Conv2D(64, (3, 3), activation="relu"),
+            MaxPooling2D(),
+            BatchNormalization(),
+            Flatten(),
+            Dense(128, activation="relu"),
+            Dropout(0.5),
+            Dense(num_classes, activation="softmax"),
+        ]
+    )
+    model.compile(
+        optimizer=tf.keras.optimizers.Adam(learning_rate=params["learning_rate"]),
+        loss=tf.keras.losses.CategoricalCrossentropy(),
+        metrics=["accuracy", keras.metrics.CategoricalAccuracy()],
+    )
+    return model
+
+
+import tensorflow as tf
+from typing import Tuple
+
+def load_and_preprocess_data() -> Tuple[tf.data.Dataset, tf.data.Dataset, tf.data.Dataset]:
+    data = load_fer2013()
+    num_classes = 7
+
+    # Define splits for train, validation, and test sets
+    split_train = int(len(data) * 0.7)
+    split_test = int(len(data) * 0.1)
+    split_val = len(data) - split_train - split_test
+
+    # Create a TensorFlow dataset from the data
+    dataset = tf.data.Dataset.from_tensor_slices(dict(data))
+    dataset = dataset.map(
+        lambda row: preprocess(row, num_classes), num_parallel_calls=tf.data.AUTOTUNE
+    )
+
+    # Partition the data into train, validation, and test sets
+    train_dataset = (
+        dataset.take(split_train).shuffle(1000).batch(32).prefetch(tf.data.AUTOTUNE)
+    )
+    val_dataset = (
+        dataset.skip(split_train).take(split_val).batch(32).prefetch(tf.data.AUTOTUNE)
+    )
+    test_dataset = (
+        dataset.skip(split_train + split_val).batch(32).prefetch(tf.data.AUTOTUNE)
+    )
+
+    return train_dataset, val_dataset, test_dataset
+
+
+def train_and_log_model(
+    model: Sequential,
+    train_dataset: tf.data.Dataset,
+    val_dataset: tf.data.Dataset,
+    params: Dict[str, str | int | List[str]],
+) -> None:
+    model.fit(
+        train_dataset,
+        validation_data=val_dataset,
+        epochs=params["epochs"],  # type: ignore
+        batch_size=params["batch_size"],
+        callbacks=[
+            keras.callbacks.EarlyStopping(
+                patience=params["early_stopping_patience"],  # type: ignore
+                # restore_best_weights=True, Removing this stops the model from being far worse at the last step... Dont know why
+            ),
+            keras.callbacks.ModelCheckpoint("./output/best_model", save_best_only=True),
+            keras.callbacks.ReduceLROnPlateau(
+                factor=params["lr_reduction_factor"], patience=params["lr_patience"]  # type: ignore
+            ),
+        ],
+    )
+    model.save("./output/emotion.h5")
+    mlflow.log_params(params)
+    plot_model(model, to_file="./output/model.png", show_shapes=True)
+    mlflow.log_artifact("./output/model.png")
+    model.save_weights("./output/model_weights/model_weights")
+    mlflow.log_artifact("./output/model_weights")
+
+
+if __name__ == "__main__":
+    setup_mlflow()
+    with mlflow.start_run() as run:
+        input_shape = (48, 48, 1)
+        num_classes = 7
+        params = {
+            "batch_size": 128,
+            "epochs": 50,
+            "input_shape": input_shape,
+            "num_classes": num_classes,
+            "optimizer": "adam",
+            "loss": "categorical_crossentropy",
+            "early_stopping_patience": 5,
+            "learning_rate": 0.0001,
+            "lr_reduction_factor": 0.1,
+            "lr_patience": 3,
+        }
+
+        train_dataset, val_dataset, test_dataset = load_and_preprocess_data()
+        model = create_model(input_shape, num_classes, params)
+        run = train_and_log_model(model, train_dataset, val_dataset, params)