diff --git a/benchmarks/bench_plot_randomized_svd.py b/benchmarks/bench_plot_randomized_svd.py
index 6bb5618b3633f..e955be64cdee3 100644
--- a/benchmarks/bench_plot_randomized_svd.py
+++ b/benchmarks/bench_plot_randomized_svd.py
@@ -63,7 +63,8 @@
     A. Szlam et al. 2014
 """
 
-# Author: Giorgio Patrini
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 
 import gc
 import os.path
diff --git a/examples/decomposition/plot_faces_decomposition.py b/examples/decomposition/plot_faces_decomposition.py
index 7082c922e1086..8eb124015009d 100644
--- a/examples/decomposition/plot_faces_decomposition.py
+++ b/examples/decomposition/plot_faces_decomposition.py
@@ -7,10 +7,6 @@
 matrix decomposition (dimension reduction) methods from the module
 :mod:`sklearn.decomposition` (see the documentation chapter
 :ref:`decompositions`).
-
-
-- Authors: Vlad Niculae, Alexandre Gramfort
-- License: BSD 3 clause
 """
 
 # Authors: The scikit-learn developers
diff --git a/examples/ensemble/plot_gradient_boosting_early_stopping.py b/examples/ensemble/plot_gradient_boosting_early_stopping.py
index 39e8b19a3125f..5949ebc9ebe9f 100644
--- a/examples/ensemble/plot_gradient_boosting_early_stopping.py
+++ b/examples/ensemble/plot_gradient_boosting_early_stopping.py
@@ -27,9 +27,6 @@
 applied, can be accessed using the `n_estimators_` attribute. Overall, early
 stopping is a valuable tool to strike a balance between model performance and
 efficiency in gradient boosting.
-
-License: BSD 3 clause
-
 """
 
 # Authors: The scikit-learn developers
diff --git a/examples/feature_selection/plot_select_from_model_diabetes.py b/examples/feature_selection/plot_select_from_model_diabetes.py
index 9359e9a982742..793a6916e8969 100644
--- a/examples/feature_selection/plot_select_from_model_diabetes.py
+++ b/examples/feature_selection/plot_select_from_model_diabetes.py
@@ -11,12 +11,6 @@
 
 We use the Diabetes dataset, which consists of 10 features collected from 442
 diabetes patients.
-
-Authors: `Manoj Kumar <mks542@nyu.edu>`_,
-`Maria Telenczuk <https://github.com/maikia>`_, Nicolas Hug.
-
-License: BSD 3 clause
-
 """
 
 # Authors: The scikit-learn developers
diff --git a/examples/neighbors/plot_caching_nearest_neighbors.py b/examples/neighbors/plot_caching_nearest_neighbors.py
index ea6a884c3d486..f3a7468871b26 100644
--- a/examples/neighbors/plot_caching_nearest_neighbors.py
+++ b/examples/neighbors/plot_caching_nearest_neighbors.py
@@ -3,7 +3,7 @@
 Caching nearest neighbors
 =========================
 
-This examples demonstrates how to precompute the k nearest neighbors before
+This example demonstrates how to precompute the k nearest neighbors before
 using them in KNeighborsClassifier. KNeighborsClassifier can compute the
 nearest neighbors internally, but precomputing them can have several benefits,
 such as finer parameter control, caching for multiple use, or custom
@@ -11,7 +11,7 @@
 
 Here we use the caching property of pipelines to cache the nearest neighbors
 graph between multiple fits of KNeighborsClassifier. The first call is slow
-since it computes the neighbors graph, while subsequent call are faster as they
+since it computes the neighbors graph, while subsequent calls are faster as they
 do not need to recompute the graph. Here the durations are small since the
 dataset is small, but the gain can be more substantial when the dataset grows
 larger, or when the grid of parameter to search is large.
diff --git a/sklearn/_isotonic.pyx b/sklearn/_isotonic.pyx
index 31489f1107645..3dfb0421f0c19 100644
--- a/sklearn/_isotonic.pyx
+++ b/sklearn/_isotonic.pyx
@@ -1,4 +1,5 @@
-# Author: Nelle Varoquaux, Andrew Tulloch, Antony Lee
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 
 # Uses the pool adjacent violators algorithm (PAVA), with the
 # enhancement of searching for the longest decreasing subsequence to
diff --git a/sklearn/base.py b/sklearn/base.py
index 2c82cf05a6c5a..d14ab4517d063 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -444,7 +444,7 @@ def _repr_html_(self):
         """HTML representation of estimator.
 
         This is redundant with the logic of `_repr_mimebundle_`. The latter
-        should be favorted in the long term, `_repr_html_` is only
+        should be favored in the long term, `_repr_html_` is only
         implemented for consumers who do not interpret `_repr_mimbundle_`.
         """
         if get_config()["display"] != "diagram":
diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index 23f2255c723e2..2fa7253e665b8 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -2,10 +2,6 @@
 
 These routines perform some hierarchical agglomerative clustering of some
 input data.
-
-Authors : Vincent Michel, Bertrand Thirion, Alexandre Gramfort,
-          Gael Varoquaux
-License: BSD 3 clause
 """
 
 # Authors: The scikit-learn developers
diff --git a/sklearn/cluster/_dbscan_inner.pyx b/sklearn/cluster/_dbscan_inner.pyx
index fb502c9f39ab3..266b214bb269a 100644
--- a/sklearn/cluster/_dbscan_inner.pyx
+++ b/sklearn/cluster/_dbscan_inner.pyx
@@ -1,6 +1,7 @@
 # Fast inner loop for DBSCAN.
-# Author: Lars Buitinck
-# License: 3-clause BSD
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 
 from libcpp.vector cimport vector
 
diff --git a/sklearn/cluster/_hdbscan/_linkage.pyx b/sklearn/cluster/_hdbscan/_linkage.pyx
index 1293bdde39c20..5684193a13d40 100644
--- a/sklearn/cluster/_hdbscan/_linkage.pyx
+++ b/sklearn/cluster/_hdbscan/_linkage.pyx
@@ -1,9 +1,7 @@
 # Minimum spanning tree single linkage implementation for hdbscan
-# Authors: Leland McInnes <leland.mcinnes@gmail.com>
-#          Steve Astels <sastels@gmail.com>
-#          Meekail Zain <zainmeekail@gmail.com>
-# Copyright (c) 2015, Leland McInnes
-# All rights reserved.
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
diff --git a/sklearn/cluster/_hdbscan/_reachability.pyx b/sklearn/cluster/_hdbscan/_reachability.pyx
index a5e4848493e02..bff686ae0a636 100644
--- a/sklearn/cluster/_hdbscan/_reachability.pyx
+++ b/sklearn/cluster/_hdbscan/_reachability.pyx
@@ -1,9 +1,7 @@
 # mutual reachability distance computations
-# Authors: Leland McInnes <leland.mcinnes@gmail.com>
-#          Meekail Zain <zainmeekail@gmail.com>
-#          Guillaume Lemaitre <g.lemaitre58@gmail.com>
-# Copyright (c) 2015, Leland McInnes
-# All rights reserved.
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
diff --git a/sklearn/cluster/_hdbscan/_tree.pyx b/sklearn/cluster/_hdbscan/_tree.pyx
index 67ab0dbec6950..161092033b915 100644
--- a/sklearn/cluster/_hdbscan/_tree.pyx
+++ b/sklearn/cluster/_hdbscan/_tree.pyx
@@ -1,7 +1,7 @@
 # Tree handling (condensing, finding stable clusters) for hdbscan
-# Authors: Leland McInnes
-# Copyright (c) 2015, Leland McInnes
-# All rights reserved.
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
diff --git a/sklearn/cluster/_hdbscan/hdbscan.py b/sklearn/cluster/_hdbscan/hdbscan.py
index b4b92d8202b39..076566ba7f360 100644
--- a/sklearn/cluster/_hdbscan/hdbscan.py
+++ b/sklearn/cluster/_hdbscan/hdbscan.py
@@ -6,13 +6,6 @@
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Authors: Leland McInnes <leland.mcinnes@gmail.com>
-#          Steve Astels <sastels@gmail.com>
-#          John Healy <jchealy@gmail.com>
-#          Meekail Zain <zainmeekail@gmail.com>
-# Copyright (c) 2015, Leland McInnes
-# All rights reserved.
-
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
 
diff --git a/sklearn/cluster/_hierarchical_fast.pyx b/sklearn/cluster/_hierarchical_fast.pyx
index 29a0a924ec307..36ae0ab0d2414 100644
--- a/sklearn/cluster/_hierarchical_fast.pyx
+++ b/sklearn/cluster/_hierarchical_fast.pyx
@@ -1,4 +1,5 @@
-# Author: Gael Varoquaux <gael.varoquaux@normalesup.org>
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 
 import numpy as np
 cimport cython
diff --git a/sklearn/cluster/_k_means_elkan.pyx b/sklearn/cluster/_k_means_elkan.pyx
index 0853d5f11d5e6..329e3075b0978 100644
--- a/sklearn/cluster/_k_means_elkan.pyx
+++ b/sklearn/cluster/_k_means_elkan.pyx
@@ -1,6 +1,5 @@
-# Author: Andreas Mueller
-#
-# Licence: BSD 3 clause
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 
 from cython cimport floating
 from cython.parallel import prange, parallel
diff --git a/sklearn/cluster/_optics.py b/sklearn/cluster/_optics.py
index 62e128dd6c75c..223ae426b5951 100755
--- a/sklearn/cluster/_optics.py
+++ b/sklearn/cluster/_optics.py
@@ -2,12 +2,6 @@
 
 These routines execute the OPTICS algorithm, and implement various
 cluster extraction methods of the ordered list.
-
-Authors: Shane Grigsby <refuge@rocktalus.com>
-         Adrin Jalali <adrinjalali@gmail.com>
-         Erich Schubert <erich@debian.org>
-         Hanmin Qin <qinhanmin2005@sina.com>
-License: BSD 3 clause
 """
 
 # Authors: The scikit-learn developers
diff --git a/sklearn/ensemble/_hist_gradient_boosting/_binning.pyx b/sklearn/ensemble/_hist_gradient_boosting/_binning.pyx
index 12dad3ffabd8c..f343ada64cdd0 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/_binning.pyx
+++ b/sklearn/ensemble/_hist_gradient_boosting/_binning.pyx
@@ -1,4 +1,5 @@
-# Author: Nicolas Hug
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 
 from cython.parallel import prange
 from libc.math cimport isnan
diff --git a/sklearn/ensemble/_hist_gradient_boosting/_gradient_boosting.pyx b/sklearn/ensemble/_hist_gradient_boosting/_gradient_boosting.pyx
index fe234958e631a..dcbbf733ebb51 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/_gradient_boosting.pyx
+++ b/sklearn/ensemble/_hist_gradient_boosting/_gradient_boosting.pyx
@@ -1,4 +1,5 @@
-# Author: Nicolas Hug
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 
 from cython.parallel import prange
 import numpy as np
diff --git a/sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx b/sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx
index 5317b8277817a..8257fa974c4a0 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx
+++ b/sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx
@@ -1,4 +1,5 @@
-# Author: Nicolas Hug
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 
 from cython.parallel import prange
 from libc.math cimport isnan
diff --git a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
index 5cd9b4c85e617..e204eec6b9785 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
+++ b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
@@ -1,6 +1,7 @@
 """This module contains routines for building histograms."""
 
-# Author: Nicolas Hug
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 
 cimport cython
 from cython.parallel import prange
diff --git a/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx b/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx
index bb0c34876a3d0..de5b92f13c31a 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx
+++ b/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx
@@ -5,7 +5,9 @@
 - Apply a split to a node, i.e. split the indices of the samples at the node
   into the newly created left and right children.
 """
-# Author: Nicolas Hug
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 
 cimport cython
 from cython.parallel import prange
diff --git a/sklearn/linear_model/_sag_fast.pyx.tp b/sklearn/linear_model/_sag_fast.pyx.tp
index 4502436ffe312..906928673b0b7 100644
--- a/sklearn/linear_model/_sag_fast.pyx.tp
+++ b/sklearn/linear_model/_sag_fast.pyx.tp
@@ -9,16 +9,11 @@ Generated file: sag_fast.pyx
 
 Each class is duplicated for all dtypes (float and double). The keywords
 between double braces are substituted during the build.
-
-Authors: Danny Sullivan <dbsullivan23@gmail.com>
-         Tom Dupre la Tour <tom.dupre-la-tour@m4x.org>
-         Arthur Mensch <arthur.mensch@m4x.org
-         Arthur Imbert <arthurimbert05@gmail.com>
-         Joan Massich <mailsik@gmail.com>
-
-License: BSD 3 clause
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # name_suffix, c_type, np_type
 dtypes = [('64', 'double', 'np.float64'),
           ('32', 'float', 'np.float32')]
diff --git a/sklearn/linear_model/_sgd_fast.pyx.tp b/sklearn/linear_model/_sgd_fast.pyx.tp
index 7944f02a1ab95..45cdf9172d8c4 100644
--- a/sklearn/linear_model/_sgd_fast.pyx.tp
+++ b/sklearn/linear_model/_sgd_fast.pyx.tp
@@ -8,15 +8,11 @@ Generated file: _sgd_fast.pyx
 
 Each relevant function is duplicated for the dtypes float and double.
 The keywords between double braces are substituted during the build.
-
-Authors: Peter Prettenhofer <peter.prettenhofer@gmail.com>
-         Mathieu Blondel (partial_fit support)
-         Rob Zinkov (passive-aggressive)
-         Lars Buitinck
-
-License: BSD 3 clause
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # The dtypes are defined as follows (name_suffix, c_type, np_type)
 dtypes = [
     ("64", "double", "np.float64"),
diff --git a/sklearn/manifold/_barnes_hut_tsne.pyx b/sklearn/manifold/_barnes_hut_tsne.pyx
index f0906fbf2bec8..e84df4a9074b2 100644
--- a/sklearn/manifold/_barnes_hut_tsne.pyx
+++ b/sklearn/manifold/_barnes_hut_tsne.pyx
@@ -1,6 +1,6 @@
-# Author: Christopher Moody <chrisemoody@gmail.com>
-# Author: Nick Travers <nickt@squareup.com>
-# Implementation by Chris Moody & Nick Travers
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # See http://homepage.tudelft.nl/19j49/t-SNE.html for reference
 # implementations and papers describing the technique
 
diff --git a/sklearn/metrics/_pairwise_distances_reduction/__init__.py b/sklearn/metrics/_pairwise_distances_reduction/__init__.py
index 926d54ea74217..ea605198e36d6 100644
--- a/sklearn/metrics/_pairwise_distances_reduction/__init__.py
+++ b/sklearn/metrics/_pairwise_distances_reduction/__init__.py
@@ -5,9 +5,6 @@
 # Pairwise Distances Reductions
 # =============================
 #
-#   Authors: The scikit-learn developers.
-#   License: BSD 3 clause
-#
 # Overview
 # --------
 #
diff --git a/sklearn/neighbors/_binary_tree.pxi.tp b/sklearn/neighbors/_binary_tree.pxi.tp
index c25740c0d6f6c..de3bcb0e5d916 100644
--- a/sklearn/neighbors/_binary_tree.pxi.tp
+++ b/sklearn/neighbors/_binary_tree.pxi.tp
@@ -14,14 +14,11 @@ implementation_specific_values = [
 # KD Tree and Ball Tree
 # =====================
 #
-#    Author: Jake Vanderplas <jakevdp@cs.washington.edu>, 2012-2013
-#            Omar Salman <omar.salman@arbisoft.com>
-#
-#    License: BSD
-#
 # _binary_tree.pxi is generated and is then literally Cython included in
 # ball_tree.pyx and kd_tree.pyx. See ball_tree.pyx.tp and kd_tree.pyx.tp.
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 }}
 
 
diff --git a/sklearn/neighbors/_quad_tree.pxd b/sklearn/neighbors/_quad_tree.pxd
index 9ed033e747314..e7e817902f103 100644
--- a/sklearn/neighbors/_quad_tree.pxd
+++ b/sklearn/neighbors/_quad_tree.pxd
@@ -1,5 +1,5 @@
-# Author: Thomas Moreau <thomas.moreau.2010@gmail.com>
-# Author: Olivier Grisel <olivier.grisel@ensta.fr>
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 
 # See quad_tree.pyx for details.
 
diff --git a/sklearn/neighbors/_quad_tree.pyx b/sklearn/neighbors/_quad_tree.pyx
index f1ef4e64f30fe..aec79da505f52 100644
--- a/sklearn/neighbors/_quad_tree.pyx
+++ b/sklearn/neighbors/_quad_tree.pyx
@@ -1,5 +1,5 @@
-# Author: Thomas Moreau <thomas.moreau.2010@gmail.com>
-# Author: Olivier Grisel <olivier.grisel@ensta.fr>
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 
 
 from cpython cimport Py_INCREF, PyObject, PyTypeObject
diff --git a/sklearn/svm/src/libsvm/libsvm_helper.c b/sklearn/svm/src/libsvm/libsvm_helper.c
index 381810ab75242..b87b52a6fbdc2 100644
--- a/sklearn/svm/src/libsvm/libsvm_helper.c
+++ b/sklearn/svm/src/libsvm/libsvm_helper.c
@@ -17,9 +17,9 @@
  * but libsvm does not expose this structure, so we define it here
  * along some utilities to convert from numpy arrays.
  *
- * License: BSD 3 clause
+ * Authors: The scikit-learn developers
+ * SPDX-License-Identifier: BSD-3-Clause
  *
- * Author: 2010 Fabian Pedregosa <fabian.pedregosa@inria.fr>
  */
 
 
diff --git a/sklearn/utils/_isfinite.pyx b/sklearn/utils/_isfinite.pyx
index 41fb71aee40c0..f3918eeacb5c4 100644
--- a/sklearn/utils/_isfinite.pyx
+++ b/sklearn/utils/_isfinite.pyx
@@ -1,4 +1,5 @@
-# Author: John Kirkham, Meekail Zain, Thomas Fan
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
 
 from libc.math cimport isnan, isinf
 from cython cimport floating
diff --git a/sklearn/utils/_seq_dataset.pyx.tp b/sklearn/utils/_seq_dataset.pyx.tp
index ab7a49a80cb9c..026768e77b50c 100644
--- a/sklearn/utils/_seq_dataset.pyx.tp
+++ b/sklearn/utils/_seq_dataset.pyx.tp
@@ -9,14 +9,11 @@ Generated file: _seq_dataset.pyx
 
 Each class is duplicated for all dtypes (float and double). The keywords
 between double braces are substituted during the build.
-
-Author: Peter Prettenhofer <peter.prettenhofer@gmail.com>
-        Arthur Imbert <arthurimbert05@gmail.com>
-        Joan Massich <mailsik@gmail.com>
-
-License: BSD 3 clause
 """
 
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
 # name_suffix, c_type, np_type
 dtypes = [('64', 'float64_t', 'np.float64'),
           ('32', 'float32_t', 'np.float32')]