Improve logging: Classifier module now no longer imports napari-related & qt-dependent things

jluethi · jluethi · commit 5495e666325e · 2024-07-19T18:19:40.000+02:00
diff --git a/src/napari_feature_classifier/classifier.py b/src/napari_feature_classifier/classifier.py
@@ -1,4 +1,5 @@
 """Core classifier class and helper functions."""
+import logging
 import pickle
 import random
 import string
@@ -11,10 +12,9 @@
 from sklearn.metrics import f1_score
 from sklearn.ensemble import RandomForestClassifier
 
-from napari_feature_classifier.utils import napari_info
 
-
-# TODO: define an interface for compatible classifiers (m.b. a subset of sklearn Estimators?)
+# TODO: define an interface for compatible classifiers (m.b. a subset of
+# sklearn Estimators?)
 class Classifier:
     """Classifier class for napari-feature-classifier.
 
@@ -23,7 +23,7 @@ class Classifier:
     feature_names: Sequence[str]
         The names of the features that are used for classification
     class_names: Sequence[str]
-        The names of the classes. It's an ordered list that is matched to 
+        The names of the classes. It's an ordered list that is matched to
         annotations [1, 2, 3, ...]
     classifier: sklearn classifier
         The classifier that is used for classification. Default is a
@@ -42,7 +42,7 @@ class Classifier:
         The percentage of the data that is used for training. The rest is used
         for testing.
     _index_columns: list[str]
-        The columns that are used for indexing the data. 
+        The columns that are used for indexing the data.
         Hard-coded to roi_id and label
     _input_schema: pandera.SchemaModel
         The schema for the input data. It's used for validation.
@@ -51,10 +51,13 @@ class Classifier:
     _predict_schema: pandera.SchemaModel
         The schema for the prediction data.
     _data: pd.DataFrame
-        The internal data storage of the classifier. Contains both annotations 
+        The internal data storage of the classifier. Contains both annotations
         as well as feature measurements for all rows (annotated objects)
     """
+
     def __init__(self, feature_names, class_names, classifier=RandomForestClassifier()):
+        self.logger = logging.getLogger("classifier")
+        self.logger.setLevel(logging.INFO)
         self._feature_names: list[str] = list(feature_names)
         self._class_names: list[str] = list(class_names)
         self._classifier = classifier
@@ -79,13 +82,13 @@ def train(self):
         """
         Train the classifier on the data it already has in self._data.
         """
-        napari_info("Training classifier...")
+        self.logger.info("Training classifier...")
         train_data = self._data[self._data.hash < self._training_data_perc]
         test_data = self._data[self._data.hash >= self._training_data_perc]
 
-         # pylint: disable=C0103
+        # pylint: disable=C0103
         X_train = train_data.drop(["hash", "annotations"], axis=1)
-         # pylint: disable=C0103
+        # pylint: disable=C0103
         X_test = test_data.drop(["hash", "annotations"], axis=1)
 
         y_train = train_data["annotations"]
@@ -94,8 +97,7 @@ def train(self):
         self._classifier.fit(X_train, y_train)
 
         f1 = f1_score(y_test, self._classifier.predict(X_test), average="macro")
-        # napari_info("F1 score on test set: {}".format(f1))
-        napari_info(
+        self.logger.info(
             f"F1 score on test set: {f1} \n"
             f"Annotations split into {len(X_train)} training and {len(X_test)} "
             "test samples. \n"
@@ -130,7 +132,6 @@ def predict_on_dict(self, dict_of_dfs):
         # Make a prediction on each of the dataframes provided
         predicted_dicts = {}
         for roi in dict_of_dfs:
-            # napari_info(f"Making a prediction for {roi=}...")
             predicted_dicts[roi] = self.predict(dict_of_dfs[roi])
         return predicted_dicts
 
@@ -149,12 +150,12 @@ def add_features(self, df_raw: pd.DataFrame):
 
     def _validate_predict_features(self, df: pd.DataFrame) -> pd.Series:
         """
-        Validate the features that are received for prediction using 
+        Validate the features that are received for prediction using
         self._predict_schema.
         """
         df_no_nans = df.dropna(subset=self._feature_names)
         if len(df) != len(df_no_nans):
-            napari_info(
+            self.logger.info(
                 f"Could not do predictions for {len(df)-len(df_no_nans)}/{len(df)} "
                 "objects because of features that contained `NA`s."
             )
@@ -174,7 +175,7 @@ def _validate_input_features(self, df: pd.DataFrame) -> pd.DataFrame:
         # Drop rows that have features with `NA`s, notify the user.
         df_no_nans = df_annotated.dropna(subset=self._feature_names)
         if len(df_no_nans) != len(df_annotated):
-            napari_info(
+            self.logger.info(
                 f"Dropped {len(df_annotated)-len(df_no_nans)}/{len(df_annotated)} "
                 "objects because of features that contained `NA`s."
             )
@@ -193,14 +194,14 @@ def add_dict_of_features(self, dict_of_features):
         Parameters
         ----------
         dict_of_features : dict
-            Dictionary with roi as key and dataframe with feature measurements 
+            Dictionary with roi as key and dataframe with feature measurements
             and annotations as value
         """
         for roi in dict_of_features:
             if "roi_id" not in dict_of_features[roi]:
                 dict_of_features[roi]["roi_id"] = roi
             df = dict_of_features[roi]
-            napari_info(f"Adding features for {roi=}...")
+            self.logger.info(f"Adding features for {roi=}...")
             self.add_features(df)
 
     def get_class_names(self):
@@ -210,7 +211,7 @@ def get_feature_names(self):
         return self._feature_names
 
     def save(self, output_path):
-        napari_info(f"Saving classifier at {output_path}...")
+        self.logger.info(f"Saving classifier at {output_path}...")
         with open(output_path, "wb") as f:
             f.write(pickle.dumps(self))
 
diff --git a/src/napari_feature_classifier/classifier_widget.py b/src/napari_feature_classifier/classifier_widget.py
@@ -1,4 +1,5 @@
 """Classifier container widget for napari"""
+import logging
 import pickle
 
 from pathlib import Path
@@ -32,6 +33,7 @@
     napari_info,
     overwrite_check_passed,
     add_annotation_names,
+    NapariHandler,
 )
 
 
@@ -636,11 +638,27 @@ def __init__(self, viewer: napari.viewer.Viewer):
         self._init_container = None
         self._run_container = None
         self._init_container = None
+        self.setup_logging()
 
         super().__init__(widgets=[])
 
         self.initialize_init_widget()
 
+    def setup_logging(self):
+        # Create a custom handler for napari
+        napari_handler = NapariHandler()
+        napari_handler.setLevel(logging.INFO)
+
+        # Optionally, set a formatter for the handler
+        # formatter = logging.Formatter(
+        #     '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+        # )
+        # napari_handler.setFormatter(formatter)
+
+        # Get the classifier's logger and add the napari handler to it
+        classifier_logger = logging.getLogger("classifier")
+        classifier_logger.addHandler(napari_handler)
+
     def initialize_init_widget(self):
         self._init_container = ClassifierInitContainer(self._viewer)
         self.append(self._init_container)
diff --git a/src/napari_feature_classifier/utils.py b/src/napari_feature_classifier/utils.py
@@ -1,5 +1,6 @@
 """Utils function for the classifier"""
 from functools import lru_cache
+import logging
 import math
 from pathlib import Path
 
@@ -118,14 +119,21 @@ def napari_info(message):
     """
     try:
         show_info(message)
-    except:  # pylint: disable=bare-except
+    except:  # pylint: disable=bare-except # noqa #E722
         print(message)
     # TODO: Would be better to check if it's running in napari and print in all
     # other cases (e.g. if someone runs the classifier form a script).
     # But can't make that work at the moment
     if in_notebook():
         print(message)
 
+
+class NapariHandler(logging.Handler):
+    def emit(self, record):
+        log_entry = self.format(record)
+        napari_info(log_entry)
+
+
 def get_valid_label_layers(viewer) -> list[str]:
     """
     Get a list of label layers that are not `Annotations` or `Predictions`.
@@ -183,7 +191,7 @@ def add_annotation_names(df, ClassSelection):
         Dataframe with annotations column.
     ClassSelection : Enum
         Enum with the class names.
-    
+
     Returns
     -------
     pd.DataFrame