-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
e65c33b
commit bdf153b
Showing
17 changed files
with
1,523 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
import os | ||
import subprocess | ||
import pandas as pd | ||
import numpy as np | ||
import tensorflow as tf | ||
from typing import Tuple | ||
|
||
|
||
def load_fer2013() -> pd.DataFrame: | ||
"""Load the emotion dataset as a tf.data.Dataset.""" | ||
if not os.path.exists("fer2013"): | ||
print("Downloading the face emotion dataset...") | ||
subprocess.check_output( | ||
"curl -SL https://www.dropbox.com/s/opuvvdv3uligypx/fer2013.tar | tar xz", | ||
shell=True, | ||
) | ||
print("Loading dataset...") | ||
data = pd.read_csv("fer2013/fer2013.csv") | ||
return data | ||
|
||
|
||
def preprocess(row, num_classes): | ||
# Convert the 'pixels' tensor to string and split | ||
pixel_string = row["pixels"] | ||
pixel_values = tf.strings.split([pixel_string], sep=" ") | ||
pixel_values = tf.strings.to_number(pixel_values, out_type=tf.int32) | ||
|
||
# Convert the RaggedTensor to a regular tensor | ||
pixel_values = tf.RaggedTensor.to_tensor(pixel_values, default_value=0) | ||
|
||
# Reshape and normalize the pixel values | ||
pixels = tf.reshape(pixel_values, (48, 48, 1)) | ||
pixels = tf.cast(pixels, tf.float32) / 255.0 | ||
|
||
# Prepare the label | ||
emotion = tf.one_hot(row["emotion"], depth=num_classes) | ||
|
||
return pixels, emotion |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
mlflow server --host 127.0.0.1 --port 8080 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
# Facial Emotion Recognition | ||
|
||
This repository is my project hand-in for the AKT3 course on Deep Learning & Computer Vision. | ||
|
||
## Dataset | ||
|
||
For training this model we will be using the [FER2013](https://www.kaggle.com/datasets/msambare/fer2013) dataset. | ||
|
||
### Example Data | ||
|
||
The dataset contains 48x48 images of human faces. | ||
|
||
 | ||
 | ||
|
||
### Analysis | ||
|
||
 | ||
|
||
Using the distribution we can determine a baseline accuracy. | ||
|
||
`HappyCounts / TotalCounts = 0.25` | ||
|
||
`Baseline accuracy = 25%` | ||
|
||
So by always guessing `Happy` we could reach an accuracy of 25%. Our goal is to improve that with the CNN. | ||
|
||
## Baseline | ||
|
||
Using the `train.py` script we are training a Facial emotion Recognition model that classifies images of human faces on 7 emotions (`"Angry", "Disgust", "Fear", "Happy", "Sad", "Surprise", "Neutral"`). | ||
|
||
We split up the dataset into train, validation and test data. | ||
|
||
* Train dataset size: 25120 examples | ||
* Validation dataset size: 7179 examples | ||
* Test dataset size: 3588 examples | ||
|
||
### Results | ||
|
||
 | ||
|
||
As shown in the graphs above we achieve very poor performance with our baseline parameters. | ||
|
||
| Parameter | Value | | ||
|--------------------------|--------------------------| | ||
| learning_rate | 0.01 | | ||
| loss | categorical_crossentropy | | ||
| epochs | 50 | | ||
| batch_size | 128 | | ||
| early_stopping_patience | 7 | | ||
| lr_patience | 5 | | ||
| lr_reduction_factor | 0.1 | | ||
| optimizer | Adam | | ||
| num_classes | 7 | | ||
| input_shape | (48, 48, 1) | | ||
| shuffle | True | | ||
| restore_best_weights | True | | ||
|
||
## Experiment 1 - Improving validation-accuracy | ||
|
||
In my first run the model only achieved a validation accuracy of 21% which is very poor. I was confused because other resources showed me that on this dataset significantly higher validation accuracies with similar CNNs could be achieved. | ||
|
||
My hypothesis is that I chose a far to high starting learning rate which lead to very early convergence and therefore significant underfitting. | ||
By reducing the learning rate I expect better results. | ||
|
||
| Parameter | Value | | ||
|--------------------------|-----------------------| | ||
| learning_rate | 0.001 | | ||
|
||
### Results | ||
|
||
Validation Loss | Validation Accuracy | Learning Rate | ||
:------------------------:|:------------------------:|:-------------------------: | ||
 |  |  | ||
|
||
As we can see in the resulting charts my hypothesis was correct and by reducing the learning rate we achieve much better results. | ||
|
||
## Experiment 2 - Smoothing the validation-loss curve | ||
|
||
The new validation loss curve is very erratic. I want to make it smoother and reduce the bumpiness of the curve. For this I again will lower the learning rate by a factor of 10. | ||
|
||
| Parameter | Value | | ||
|--------------------------|-----------------------| | ||
| learning_rate | 0.0001 | | ||
|
||
### Results | ||
|
||
As we can see in the resulting charts my hypothesis was correct and by reducing the learning rate the curve is much less erratic. | ||
|
||
Validation Loss | Validation Accuracy | Learning Rate | ||
:------------------------:|:------------------------:|:-------------------------: | ||
 |  |  | ||
|
||
## Experiment 3 - Disabling the restore-best-weights option | ||
|
||
For some reason in the EarlyStopping callback the restore_best_weights option actually chooses a worse configuration of the model at the end. By disabling the option we want to prohibit that behaviour. | ||
|
||
``` python | ||
keras.callbacks.EarlyStopping( | ||
patience=params["early_stopping_patience"], | ||
restore_best_weights=False | ||
), | ||
``` | ||
|
||
| Parameter | Value | | ||
|--------------------------|-----------------------| | ||
| restore_best_weights | False | | ||
|
||
### Results | ||
|
||
In the following graph we see that by disabling the `restore_best_weights` option we can actually keep the better model in the end. | ||
|
||
 |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
from typing import Tuple, Dict, List | ||
import mlflow | ||
import mlflow.tensorflow | ||
from datetime import datetime | ||
from tensorflow import keras | ||
from keras.layers import ( | ||
Dense, | ||
Flatten, | ||
Conv2D, | ||
MaxPooling2D, | ||
Dropout, | ||
BatchNormalization, | ||
) | ||
from keras.models import Sequential | ||
from keras.utils import plot_model | ||
import tensorflow as tf | ||
from load_fer2013 import load_fer2013, preprocess | ||
|
||
|
||
def setup_mlflow() -> None: | ||
mlflow.set_tracking_uri("http://127.0.0.1:8080") | ||
experiment_name = "Baseline" | ||
experiment_description = ( | ||
"This is a neural network for classifiying human emotions based on facial expressions." | ||
"This experiment will create a baseline neural network for further experiments." | ||
) | ||
experiment_tags = { | ||
"project_name": "facial-emotion-recognition", | ||
"experiment_name": experiment_name, | ||
"dataset": "fer2013", | ||
"mlflow.note.content": experiment_description, | ||
"date": datetime.now().strftime("%d.%m.%Y %H:%M"), | ||
} | ||
mlflow.set_experiment(experiment_name) | ||
mlflow.set_experiment_tags(experiment_tags) | ||
mlflow.tensorflow.autolog() | ||
|
||
|
||
def create_model( | ||
input_shape: Tuple[int, int, int], num_classes: int, params | ||
) -> Sequential: | ||
model = Sequential( | ||
[ | ||
Conv2D(32, (3, 3), activation="relu", input_shape=input_shape), | ||
MaxPooling2D(), | ||
BatchNormalization(), | ||
Conv2D(64, (3, 3), activation="relu"), | ||
MaxPooling2D(), | ||
BatchNormalization(), | ||
Flatten(), | ||
Dense(128, activation="relu"), | ||
Dropout(0.5), | ||
Dense(num_classes, activation="softmax"), | ||
] | ||
) | ||
model.compile( | ||
optimizer=tf.keras.optimizers.Adam(learning_rate=params["learning_rate"]), | ||
loss=tf.keras.losses.CategoricalCrossentropy(), | ||
metrics=["accuracy", keras.metrics.CategoricalAccuracy()], | ||
) | ||
return model | ||
|
||
|
||
import tensorflow as tf | ||
from typing import Tuple | ||
|
||
def load_and_preprocess_data() -> Tuple[tf.data.Dataset, tf.data.Dataset, tf.data.Dataset]: | ||
data = load_fer2013() | ||
num_classes = 7 | ||
|
||
# Define splits for train, validation, and test sets | ||
split_train = int(len(data) * 0.7) | ||
split_test = int(len(data) * 0.1) | ||
split_val = len(data) - split_train - split_test | ||
|
||
# Create a TensorFlow dataset from the data | ||
dataset = tf.data.Dataset.from_tensor_slices(dict(data)) | ||
dataset = dataset.map( | ||
lambda row: preprocess(row, num_classes), num_parallel_calls=tf.data.AUTOTUNE | ||
) | ||
|
||
# Partition the data into train, validation, and test sets | ||
train_dataset = ( | ||
dataset.take(split_train).shuffle(1000).batch(32).prefetch(tf.data.AUTOTUNE) | ||
) | ||
val_dataset = ( | ||
dataset.skip(split_train).take(split_val).batch(32).prefetch(tf.data.AUTOTUNE) | ||
) | ||
test_dataset = ( | ||
dataset.skip(split_train + split_val).batch(32).prefetch(tf.data.AUTOTUNE) | ||
) | ||
|
||
return train_dataset, val_dataset, test_dataset | ||
|
||
|
||
def train_and_log_model( | ||
model: Sequential, | ||
train_dataset: tf.data.Dataset, | ||
val_dataset: tf.data.Dataset, | ||
params: Dict[str, str | int | List[str]], | ||
) -> None: | ||
model.fit( | ||
train_dataset, | ||
validation_data=val_dataset, | ||
epochs=params["epochs"], # type: ignore | ||
batch_size=params["batch_size"], | ||
callbacks=[ | ||
keras.callbacks.EarlyStopping( | ||
patience=params["early_stopping_patience"], # type: ignore | ||
# restore_best_weights=True, Removing this stops the model from being far worse at the last step... Dont know why | ||
), | ||
keras.callbacks.ModelCheckpoint("./output/best_model", save_best_only=True), | ||
keras.callbacks.ReduceLROnPlateau( | ||
factor=params["lr_reduction_factor"], patience=params["lr_patience"] # type: ignore | ||
), | ||
], | ||
) | ||
model.save("./output/emotion.h5") | ||
mlflow.log_params(params) | ||
plot_model(model, to_file="./output/model.png", show_shapes=True) | ||
mlflow.log_artifact("./output/model.png") | ||
model.save_weights("./output/model_weights/model_weights") | ||
mlflow.log_artifact("./output/model_weights") | ||
|
||
|
||
if __name__ == "__main__": | ||
setup_mlflow() | ||
with mlflow.start_run() as run: | ||
input_shape = (48, 48, 1) | ||
num_classes = 7 | ||
params = { | ||
"batch_size": 128, | ||
"epochs": 50, | ||
"input_shape": input_shape, | ||
"num_classes": num_classes, | ||
"optimizer": "adam", | ||
"loss": "categorical_crossentropy", | ||
"early_stopping_patience": 5, | ||
"learning_rate": 0.0001, | ||
"lr_reduction_factor": 0.1, | ||
"lr_patience": 3, | ||
} | ||
|
||
train_dataset, val_dataset, test_dataset = load_and_preprocess_data() | ||
model = create_model(input_shape, num_classes, params) | ||
run = train_and_log_model(model, train_dataset, val_dataset, params) |