top_n CLI arg for birdnet_analyzer.analyze (#577)

* top_n CLI arg for birdnet_analyzer.analyze * Allow lower min_conf values * top-n input for gui
kahst · Feb 13, 2025 · 1ea3e81 · 1ea3e81
1 parent 9e5acbe
commit 1ea3e81
Show file tree

Hide file tree

Showing 15 changed files with 157 additions and 54 deletions.
diff --git a/birdnet_analyzer/analyze/__init__.py b/birdnet_analyzer/analyze/__init__.py
@@ -63,6 +63,7 @@ def main():
     # Load species list from location filter or provided list
     cfg.LATITUDE, cfg.LONGITUDE, cfg.WEEK = args.lat, args.lon, args.week
     cfg.LOCATION_FILTER_THRESHOLD = args.sf_thresh
+    cfg.TOP_N = args.top_n
 
     if cfg.LATITUDE == -1 and cfg.LONGITUDE == -1:
         if not args.slist:

diff --git a/birdnet_analyzer/analyze/utils.py b/birdnet_analyzer/analyze/utils.py
@@ -93,11 +93,10 @@ def generate_raven_table(timestamps: list[str], result: dict[str, list], afile_p
         start, end = timestamp.split("-", 1)
 
         for c in result[timestamp]:
-            if c[1] > cfg.MIN_CONFIDENCE and (not cfg.SPECIES_LIST or c[0] in cfg.SPECIES_LIST):
-                selection_id += 1
-                label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
-                code = cfg.CODES[c[0]] if c[0] in cfg.CODES else c[0]
-                rstring += f"{selection_id}\tSpectrogram 1\t1\t{start}\t{end}\t{low_freq}\t{high_freq}\t{label.split('_', 1)[-1]}\t{code}\t{c[1]:.4f}\t{afile_path}\t{start}\n"
+            selection_id += 1
+            label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
+            code = cfg.CODES[c[0]] if c[0] in cfg.CODES else c[0]
+            rstring += f"{selection_id}\tSpectrogram 1\t1\t{start}\t{end}\t{low_freq}\t{high_freq}\t{label.split('_', 1)[-1]}\t{code}\t{c[1]:.4f}\t{afile_path}\t{start}\n"
 
         # Write result string to file
         out_string += rstring
@@ -133,11 +132,10 @@ def generate_audacity(timestamps: list[str], result: dict[str, list], result_pat
         rstring = ""
 
         for c in result[timestamp]:
-            if c[1] > cfg.MIN_CONFIDENCE and (not cfg.SPECIES_LIST or c[0] in cfg.SPECIES_LIST):
-                label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
-                ts = timestamp.replace("-", "\t")
-                lbl = label.replace("_", ", ")
-                rstring += f"{ts}\t{lbl}\t{c[1]:.4f}\n"
+            label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
+            ts = timestamp.replace("-", "\t")
+            lbl = label.replace("_", ", ")
+            rstring += f"{ts}\t{lbl}\t{c[1]:.4f}\n"
 
         # Write result string to file
         out_string += rstring
@@ -169,23 +167,22 @@ def generate_kaleidoscope(timestamps: list[str], result: dict[str, list], afile_
         start, end = timestamp.split("-", 1)
 
         for c in result[timestamp]:
-            if c[1] > cfg.MIN_CONFIDENCE and (not cfg.SPECIES_LIST or c[0] in cfg.SPECIES_LIST):
-                label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
-                rstring += "{},{},{},{},{},{},{},{:.4f},{:.4f},{:.4f},{},{},{}\n".format(
-                    parent_folder.rstrip("/"),
-                    folder_name,
-                    filename,
-                    start,
-                    float(end) - float(start),
-                    label.split("_", 1)[0],
-                    label.split("_", 1)[-1],
-                    c[1],
-                    cfg.LATITUDE,
-                    cfg.LONGITUDE,
-                    cfg.WEEK,
-                    cfg.SIG_OVERLAP,
-                    (1.0 - cfg.SIGMOID_SENSITIVITY) + 1.0,
-                )
+            label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
+            rstring += "{},{},{},{},{},{},{},{:.4f},{:.4f},{:.4f},{},{},{}\n".format(
+                parent_folder.rstrip("/"),
+                folder_name,
+                filename,
+                start,
+                float(end) - float(start),
+                label.split("_", 1)[0],
+                label.split("_", 1)[-1],
+                c[1],
+                cfg.LATITUDE,
+                cfg.LONGITUDE,
+                cfg.WEEK,
+                cfg.SIG_OVERLAP,
+                (1.0 - cfg.SIGMOID_SENSITIVITY) + 1.0,
+            )
 
         # Write result string to file
         out_string += rstring
@@ -214,10 +211,8 @@ def generate_csv(timestamps: list[str], result: dict[str, list], afile_path: str
 
         for c in result[timestamp]:
             start, end = timestamp.split("-", 1)
-
-            if c[1] > cfg.MIN_CONFIDENCE and (not cfg.SPECIES_LIST or c[0] in cfg.SPECIES_LIST):
-                label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
-                rstring += f"{start},{end},{label.split('_', 1)[0]},{label.split('_', 1)[-1]},{c[1]:.4f},{afile_path}\n"
+            label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
+            rstring += f"{start},{end},{label.split('_', 1)[0]},{label.split('_', 1)[-1]},{c[1]:.4f},{afile_path}\n"
 
         # Write result string to file
         out_string += rstring
@@ -582,11 +577,20 @@ def analyze_file(item):
                     pred = p[i]
 
                     # Assign scores to labels
-                    p_labels = zip(cfg.LABELS, pred, strict=True)
+                    p_labels = [
+                        p
+                        for p in zip(cfg.LABELS, pred, strict=True)
+                        if (cfg.TOP_N or p[1] >= cfg.MIN_CONFIDENCE)
+                        and (not cfg.SPECIES_LIST or p[0] in cfg.SPECIES_LIST)
+                    ]
 
                     # Sort by score
                     p_sorted = sorted(p_labels, key=operator.itemgetter(1), reverse=True)
 
+                    if cfg.TOP_N:
+                        p_sorted = p_sorted[: cfg.TOP_N]
+
+                    # TODO hier schon top n oder min conf raussortieren
                     # Store top 5 results and advance indices
                     results[str(s_start) + "-" + str(s_end)] = p_sorted
 

diff --git a/birdnet_analyzer/cli.py b/birdnet_analyzer/cli.py
@@ -109,9 +109,9 @@ def species_args():
     )
     p.add_argument(
         "--sf_thresh",
-        type=lambda a: max(0.01, min(0.99, float(a))),
+        type=lambda a: max(0.0001, min(0.99, float(a))),
         default=cfg.LOCATION_FILTER_THRESHOLD,
-        help="Minimum species occurrence frequency threshold for location filter. Values in [0.01, 0.99].",
+        help="Minimum species occurrence frequency threshold for location filter. Values in [0.0001, 0.99].",
     )
 
     return p
@@ -220,8 +220,8 @@ def min_conf_args():
     p.add_argument(
         "--min_conf",
         default=cfg.MIN_CONFIDENCE,
-        type=lambda a: max(0.01, min(0.99, float(a))),
-        help="Minimum confidence threshold. Values in [0.01, 0.99].",
+        type=lambda a: max(0.00001, min(0.99, float(a))),
+        help="Minimum confidence threshold. Values in [0.00001, 0.99].",
     )
 
     return p
@@ -337,6 +337,12 @@ def __call__(self, parser, args, values, option_string=None):
         help="Skip files that have already been analyzed.",
     )
 
+    parser.add_argument(
+        "--top_n",
+        type=lambda a: max(1, int(a)),
+        help="Saves only the top N predictions for each segment independent of their score. Threshold will be ignored.",
+    )
+
     return parser
 
 
@@ -490,9 +496,9 @@ def species_parser():
 def train_parser():
     """
     Creates an argument parser for training a custom classifier with BirdNET.
-    The parser includes arguments for various training parameters such as input data path, crop mode, 
-    output path, number of epochs, batch size, validation split ratio, learning rate, hidden units, 
-    dropout rate, mixup, upsampling ratio and mode, model format, model save mode, cache mode and file, 
+    The parser includes arguments for various training parameters such as input data path, crop mode,
+    output path, number of epochs, batch size, validation split ratio, learning rate, hidden units,
+    dropout rate, mixup, upsampling ratio and mode, model format, model save mode, cache mode and file,
     and hyperparameter tuning options.
     Returns:
         argparse.ArgumentParser: Configured argument parser for training a custom classifier.

diff --git a/birdnet_analyzer/config.py b/birdnet_analyzer/config.py
@@ -48,6 +48,9 @@
 BANDPASS_FMIN: int = 0
 BANDPASS_FMAX: int = 15000
 
+# Top N species to display in selection table, ignored if set to None
+TOP_N = None
+
 # Audio speed
 AUDIO_SPEED: float = 1.0
 

diff --git a/birdnet_analyzer/gui/analysis.py b/birdnet_analyzer/gui/analysis.py
@@ -35,6 +35,8 @@ def analyze_file_wrapper(entry):
 def run_analysis(
     input_path: str,
     output_path: str | None,
+    use_top_n: bool,
+    top_n: int,
     confidence: float,
     sensitivity: float,
     overlap: float,
@@ -96,6 +98,7 @@ def run_analysis(
     cfg.LATITUDE, cfg.LONGITUDE, cfg.WEEK = lat, lon, -1 if use_yearlong else week
     cfg.LOCATION_FILTER_THRESHOLD = sf_thresh
     cfg.SKIP_EXISTING_RESULTS = skip_existing
+    cfg.TOP_N = top_n if use_top_n else None
 
     if species_list_choice == gu._CUSTOM_SPECIES:
         if not species_list_file or not species_list_file.name:

diff --git a/birdnet_analyzer/gui/multi_file.py b/birdnet_analyzer/gui/multi_file.py
@@ -14,6 +14,8 @@
 
 def run_batch_analysis(
     output_path,
+    use_top_n,
+    top_n,
     confidence,
     sensitivity,
     overlap,
@@ -52,6 +54,8 @@ def run_batch_analysis(
     return run_analysis(
         None,
         output_path,
+        use_top_n,
+        top_n,
         confidence,
         sensitivity,
         overlap,
@@ -129,9 +133,16 @@ def select_directory_wrapper():  # Nishant - Function modified for For Folder se
                     show_progress=False,
                 )
 
-        confidence_slider, sensitivity_slider, overlap_slider, audio_speed_slider, fmin_number, fmax_number = (
-            gu.sample_sliders()
-        )
+        (
+            use_top_n,
+            top_n_input,
+            confidence_slider,
+            sensitivity_slider,
+            overlap_slider,
+            audio_speed_slider,
+            fmin_number,
+            fmax_number,
+        ) = gu.sample_sliders()
 
         (
             species_list_radio,
@@ -199,6 +210,8 @@ def select_directory_wrapper():  # Nishant - Function modified for For Folder se
 
         inputs = [
             output_directory_predict_state,
+            use_top_n,
+            top_n_input,
             confidence_slider,
             sensitivity_slider,
             overlap_slider,

diff --git a/birdnet_analyzer/gui/single_file.py b/birdnet_analyzer/gui/single_file.py
@@ -11,6 +11,8 @@
 
 def run_single_file_analysis(
     input_path,
+    use_top_n,
+    top_n,
     confidence,
     sensitivity,
     overlap,
@@ -42,6 +44,8 @@ def run_single_file_analysis(
     result_filepath = run_analysis(
         input_path,
         None,
+        use_top_n,
+        top_n,
         confidence,
         sensitivity,
         overlap,
@@ -97,9 +101,16 @@ def build_single_analysis_tab():
             )
         audio_path_state = gr.State()
 
-        confidence_slider, sensitivity_slider, overlap_slider, audio_speed_slider, fmin_number, fmax_number = (
-            gu.sample_sliders(False)
-        )
+        (
+            use_top_n,
+            top_n_input,
+            confidence_slider,
+            sensitivity_slider,
+            overlap_slider,
+            audio_speed_slider,
+            fmin_number,
+            fmax_number,
+        ) = gu.sample_sliders(False)
 
         (
             species_list_radio,
@@ -147,6 +158,8 @@ def try_generate_spectrogram(audio_path, generate_spectrogram):
 
         inputs = [
             audio_path_state,
+            use_top_n,
+            top_n_input,
             confidence_slider,
             sensitivity_slider,
             overlap_slider,

diff --git a/birdnet_analyzer/gui/utils.py b/birdnet_analyzer/gui/utils.py
@@ -190,11 +190,11 @@ def build_footer():
             f"""
                 <div style='display: flex; justify-content: space-around; align-items: center; padding: 10px; text-align: center'>
                     <div>
-                        <div style="display: flex;flex-direction: row;">GUI version:&nbsp<span id="current-version">{os.environ['GUI_VERSION'] if FROZEN else 'main'}</span><span style="display: none" id="update-available"><a>+</a></span></div>
+                        <div style="display: flex;flex-direction: row;">GUI version:&nbsp<span id="current-version">{os.environ["GUI_VERSION"] if FROZEN else "main"}</span><span style="display: none" id="update-available"><a>+</a></span></div>
                         <div>Model version: {cfg.MODEL_VERSION}</div>
                     </div>
                     <div>K. Lisa Yang Center for Conservation Bioacoustics<br>Chemnitz University of Technology</div>
-                    <div>{loc.localize('footer-help')}:<br><a href='https://birdnet.cornell.edu/analyzer' target='_blank'>birdnet.cornell.edu/analyzer</a></div>
+                    <div>{loc.localize("footer-help")}:<br><a href='https://birdnet.cornell.edu/analyzer' target='_blank'>birdnet.cornell.edu/analyzer</a></div>
                 </div>
                 """
         )
@@ -259,14 +259,37 @@ def sample_sliders(opened=True):
     with gr.Accordion(loc.localize("inference-settings-accordion-label"), open=opened):
         with gr.Group():
             with gr.Row():
+                use_top_n_checkbox = gr.Checkbox(
+                    label=loc.localize("inference-settings-use-top-n-checkbox-label"),
+                    value=False,
+                    info=loc.localize("inference-settings-use-top-n-checkbox-info"),
+                )
+                top_n_input = gr.Number(
+                    value=5,
+                    minimum=1,
+                    precision=1,
+                    visible=False,
+                    label=loc.localize("inference-settings-top-n-number-label"),
+                    info=loc.localize("inference-settings-top-n-number-info"),
+                )
                 confidence_slider = gr.Slider(
-                    minimum=0,
-                    maximum=1,
+                    minimum=0.001,
+                    maximum=0.99,
                     value=0.5,
-                    step=0.01,
+                    step=0.001,
                     label=loc.localize("inference-settings-confidence-slider-label"),
                     info=loc.localize("inference-settings-confidence-slider-info"),
                 )
+
+        use_top_n_checkbox.change(
+            lambda use_top_n: (gr.Number(visible=use_top_n), gr.Slider(visible=not use_top_n)),
+            inputs=use_top_n_checkbox,
+            outputs=[top_n_input, confidence_slider],
+            show_progress=False,
+        )
+
+        with gr.Group():
+            with gr.Row():
                 sensitivity_slider = gr.Slider(
                     minimum=0.5,
                     maximum=1.5,
@@ -283,7 +306,7 @@ def sample_sliders(opened=True):
                     label=loc.localize("inference-settings-overlap-slider-label"),
                     info=loc.localize("inference-settings-overlap-slider-info"),
                 )
-                
+
             with gr.Row():
                 audio_speed_slider = gr.Slider(
                     minimum=-10,
@@ -309,7 +332,16 @@ def sample_sliders(opened=True):
                     info=loc.localize("inference-settings-fmax-number-info"),
                 )
 
-        return confidence_slider, sensitivity_slider, overlap_slider, audio_speed_slider, fmin_number, fmax_number
+        return (
+            use_top_n_checkbox,
+            top_n_input,
+            confidence_slider,
+            sensitivity_slider,
+            overlap_slider,
+            audio_speed_slider,
+            fmin_number,
+            fmax_number,
+        )
 
 
 def locale():

diff --git a/birdnet_analyzer/lang/de.json b/birdnet_analyzer/lang/de.json
@@ -8,6 +8,10 @@
     "single-tab-output-header-common-name": "Trivialname",
     "single-tab-output-header-confidence": "Konfidenz",
     "inference-settings-accordion-label": "Inferenzeinstellungen",
+    "inference-settings-use-top-n-checkbox-label": "Oberste N Arten verwenden",
+    "inference-settings-use-top-n-checkbox-info": "Wählt die obersten N Arten aus, sortiert nach Konfidenz.",
+    "inference-settings-top-n-number-label": "Top N Arten",
+    "inference-settings-top-n-number-info": "Ignoriert die Konfidenzschwelle.",
     "inference-settings-confidence-slider-label": "Konfidenz-Schwellenwert",
     "inference-settings-confidence-slider-info": "Passen Sie den Schwellenwert an, um Ergebnisse mit einem Wert unter diesem Niveau zu ignorieren.",
     "inference-settings-sensitivity-slider-label": "Sensitivität",

diff --git a/birdnet_analyzer/lang/en.json b/birdnet_analyzer/lang/en.json
@@ -8,6 +8,10 @@
     "single-tab-output-header-common-name": "Common name",
     "single-tab-output-header-confidence": "Confidence",
     "inference-settings-accordion-label": "Inference settings",
+    "inference-settings-use-top-n-checkbox-label": "Use top N species",
+    "inference-settings-use-top-n-checkbox-info": "Picks the top N species sorted by confidence score.",
+    "inference-settings-top-n-number-label": "Top N species",
+    "inference-settings-top-n-number-info": "Ignores the confidence threshold.",
     "inference-settings-confidence-slider-label": "Minimum confidence",
     "inference-settings-confidence-slider-info": "Adjust the threshold to ignore results with confidence below this level.",
     "inference-settings-sensitivity-slider-label": "Sensitivity",
@@ -183,8 +187,8 @@
     "validation-no-valid-frequency": "Please enter a valid frequency in",
     "validation-no-audio-directory-selected": "No audio directory selected",
     "validation-no-negative-samples-in-binary-classification": "Negative labels can't be used with binary classification",
-    "validation-non-event-samples-required-in-binary-classification": "Non-event samples are required for binary classification", 
-    "validation-only-repeat-upsampling-for-multi-label": "Only repeat-upsampling ist available for multi-label", 
+    "validation-non-event-samples-required-in-binary-classification": "Non-event samples are required for binary classification",
+    "validation-only-repeat-upsampling-for-multi-label": "Only repeat-upsampling ist available for multi-label",
     "progress-preparing": "Preparing",
     "progress-starting": "Starting",
     "progress-build-classifier": "Loading data & building classifier",