pysal · martirenom · Jul 31, 2024 · Aug 2, 2024 · Aug 15, 2024 · Aug 15, 2024
diff --git a/esda/crand.py b/esda/crand.py
@@ -89,8 +89,15 @@ def crand(
         Spatial weights object
     observed : ndarray
         (N,) array with observed values
-    permutations : int
-        Number of permutations for conditional randomisation
+    permutations : int, np.ndarray
+        Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the
+        conditional random null hypothesis for each site. Permutations
+        might be specified as an array of indices if the user needs to add
+        structure to this conditional permutation null hypothesis. Common
+        reasons to do this include exchangeability violations, which might
+        then require us to shuffle observations within (but not between)
+        groups, or linearity constraints, which may require certain
+        sequences of observation relationships to be preserved.
     keep : Boolean
         If True, store simulation; else do not return randomised statistics
     n_jobs : int
@@ -170,8 +177,40 @@ def crand(
     # self neighbor, since conditional randomization conditions on site i.
     cardinalities = np.array((adj_matrix != 0).sum(1)).flatten()
     max_card = cardinalities.max()
-    permuted_ids = vec_permutations(max_card, n, permutations, seed)
 
+    if np.ndim(permutations) == 0:
+        # Random permutation array
+        if int(permutations) != permutations:
+            raise ValueError(
+            f"An integer number of permutations is required, but {permutations} was provided."
+            )
+        permuted_ids = vec_permutations(max_card, n, permutations, seed)
+    elif np.ndim(permutations) == 2:
+        # User defined permutation array
+        permuted_ids = permutations
+        if permuted_ids.shape[0] != permutations:
+            permutations = permuted_ids.shape[0]
+            warnings.warn(
+                f"Number of permutations has been adjusted to match the length of the "
+                f"permutations array. New value of 'permutations' is {permutations}.",
+                stacklevel=2,
+            )
+        if permuted_ids.shape[1] < max_card:
+            raise ValueError(
+            f"The `permutations` provided were shape {permuted_ids.shape}"
+            f", but must be ({permutations, max_card}) in order to supply"
+            f" enough possible neighbors to shuffle every observation."
+            f" Consider supplying a wider permutation matrix, with"
+            f" {max_card-permuted_ids.shape[1]} more columns."
+            )
+    else:
+        raise ValueError(
+            f"The `permutations` argument must be either an integer, or a"
+            f" two-dimensional numpy array of shape (p,k), where p is the"
+            f" number of permutations to run and k is the largest amount of"
+            f" shuffled pairs that must be considered at a given site, but"
+            f" {permutations} was provided."
+            )    
     if n_jobs != 1:
         try:
             import joblib  # noqa: F401

diff --git a/esda/geary_local.py b/esda/geary_local.py
@@ -42,10 +42,15 @@ def __init__(
                            (default=0.05)
                            Default significance threshold used for
                            creation of labels groups.
-        permutations     : int
-                           (default=999)
-                           number of random permutations for calculation
-                           of pseudo p_values
+        permutations : int, np.ndarray
+            Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the
+            conditional random null hypothesis for each site. Permutations
+            might be specified as an array of indices if the user needs to add
+            structure to this conditional permutation null hypothesis. Common
+            reasons to do this include exchangeability violations, which might
+            then require us to shuffle observations within (but not between)
+            groups, or linearity constraints, which may require certain
+            sequences of observation relationships to be preserved.
         n_jobs           : int
                            (default=1)
                            Number of cores to be used in the conditional

diff --git a/esda/getisord.py b/esda/getisord.py
@@ -257,9 +257,15 @@ class G_Local:  # noqa: N801
         spatial weights instance as W or Graph aligned with y
     transform : {'R', 'B'}
         the type of w, either 'B' (binary) or 'R' (row-standardized)
-    permutations : int
-        the number of random permutations for calculating
-        pseudo p values
+    permutations : int, np.ndarray
+        Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the
+        conditional random null hypothesis for each site. Permutations
+        might be specified as an array of indices if the user needs to add
+        structure to this conditional permutation null hypothesis. Common
+        reasons to do this include exchangeability violations, which might
+        then require us to shuffle observations within (but not between)
+        groups, or linearity constraints, which may require certain
+        sequences of observation relationships to be preserved.
     star : boolean or float
         whether or not to include focal observation in sums (default: False)
         if the row-transformed weight is provided, then this is the default

diff --git a/esda/join_counts_local.py b/esda/join_counts_local.py
@@ -30,9 +30,15 @@ def __init__(
         ----------
         connectivity : W | Graph
             spatial weights instance as W or Graph aligned with y
-        permutations : int
-            number of random permutations for calculation of pseudo
-            p_values
+        permutations : int, np.ndarray
+            Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the
+            conditional random null hypothesis for each site. Permutations
+            might be specified as an array of indices if the user needs to add
+            structure to this conditional permutation null hypothesis. Common
+            reasons to do this include exchangeability violations, which might
+            then require us to shuffle observations within (but not between)
+            groups, or linearity constraints, which may require certain
+            sequences of observation relationships to be preserved.
         n_jobs : int
             Number of cores to be used in the conditional randomisation. If -1,
             all available cores are used.

diff --git a/esda/join_counts_local_bv.py b/esda/join_counts_local_bv.py
@@ -30,8 +30,15 @@ def __init__(
         ----------
         connectivity : W | Graph
             spatial weights instance as W or Graph aligned with y
-        permutations : int
-            number of random permutations for calculation of pseudo p_values
+        permutations : int, np.ndarray
+            Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the
+            conditional random null hypothesis for each site. Permutations
+            might be specified as an array of indices if the user needs to add
+            structure to this conditional permutation null hypothesis. Common
+            reasons to do this include exchangeability violations, which might
+            then require us to shuffle observations within (but not between)
+            groups, or linearity constraints, which may require certain
+            sequences of observation relationships to be preserved.
         n_jobs : int
             Number of cores to be used in the conditional randomisation.
             If -1, all available cores are used.

diff --git a/esda/join_counts_local_mv.py b/esda/join_counts_local_mv.py
@@ -30,8 +30,15 @@ def __init__(
         ----------
         connectivity : W | Graph
             spatial weights instance as W or Graph aligned with y
-        permutations : int
-            number of random permutations for calculation of pseudo p_values
+        permutations : int, np.ndarray
+            Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the
+            conditional random null hypothesis for each site. Permutations
+            might be specified as an array of indices if the user needs to add
+            structure to this conditional permutation null hypothesis. Common
+            reasons to do this include exchangeability violations, which might
+            then require us to shuffle observations within (but not between)
+            groups, or linearity constraints, which may require certain
+            sequences of observation relationships to be preserved.
         n_jobs : int
             Number of cores to be used in the conditional randomisation. If -1,
             all available cores are used.

diff --git a/esda/moran.py b/esda/moran.py
@@ -891,9 +891,15 @@ class Moran_Local:  # noqa: N801
          "D": doubly-standardized,
          "U": untransformed (general weights),
          "V": variance-stabilizing.
-    permutations : int
-        number of random permutations for calculation of pseudo
-        p_values
+    permutations : int, np.ndarray
+        Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the
+        conditional random null hypothesis for each site. Permutations
+        might be specified as an array of indices if the user needs to add
+        structure to this conditional permutation null hypothesis. Common
+        reasons to do this include exchangeability violations, which might
+        then require us to shuffle observations within (but not between)
+        groups, or linearity constraints, which may require certain
+        sequences of observation relationships to be preserved.
     geoda_quads : boolean
         (default=False)
         If True use GeoDa scheme: HH=1, LL=2, LH=3, HL=4
@@ -1265,9 +1271,15 @@ class Moran_Local_BV:  # noqa: N801
         "D": doubly-standardized,
         "U": untransformed (general weights),
         "V": variance-stabilizing.
-    permutations   : int
-        number of random permutations for calculation of pseudo
-        p_values
+    permutations : int, np.ndarray
+        Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the
+        conditional random null hypothesis for each site. Permutations
+        might be specified as an array of indices if the user needs to add
+        structure to this conditional permutation null hypothesis. Common
+        reasons to do this include exchangeability violations, which might
+        then require us to shuffle observations within (but not between)
+        groups, or linearity constraints, which may require certain
+        sequences of observation relationships to be preserved.
     geoda_quads    : boolean
         (default=False)
         If True use GeoDa scheme: HH=1, LL=2, LH=3, HL=4
@@ -1406,6 +1418,7 @@ def __init__(
                 w,
                 self.Is,
                 permutations,
+                None,
                 keep_simulations,
                 n_jobs=n_jobs,
                 stat_func=_moran_local_bv_crand,
@@ -1535,9 +1548,15 @@ class Moran_Local_Rate(Moran_Local):  # noqa: N801
         "D": doubly-standardized,
         "U": untransformed (general weights),
         "V": variance-stabilizing.
-    permutations : int
-        number of random permutations for calculation of pseudo
-        p_values
+    permutations : int, np.ndarray
+        Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the
+        conditional random null hypothesis for each site. Permutations
+        might be specified as an array of indices if the user needs to add
+        structure to this conditional permutation null hypothesis. Common
+        reasons to do this include exchangeability violations, which might
+        then require us to shuffle observations within (but not between)
+        groups, or linearity constraints, which may require certain
+        sequences of observation relationships to be preserved.
     geoda_quads : boolean
          (default=False)
          If True use GeoDa scheme: HH=1, LL=2, LH=3, HL=4

diff --git a/esda/tests/test_moran.py b/esda/tests/test_moran.py
@@ -162,6 +162,7 @@ def setup_method(self):
         f = libpysal.io.open(libpysal.examples.get_path("desmith.txt"))
         self.y = np.array(f.by_col["z"])
 
+
     @parametrize_desmith
     def test_Moran_Local(self, w):
         lm = moran.Moran_Local(
@@ -174,6 +175,29 @@ def test_Moran_Local(self, w):
         )
         np.testing.assert_allclose(lm.z_sim[0], -0.6990291160835514)
         np.testing.assert_allclose(lm.p_z_sim[0], 0.24226691753791396)
+
+    @parametrize_desmith
+    def test_Moran_Local_custom_perms(self, w):
+        np.random.seed(SEED)
+        cardinalities = np.array((w.sparse != 0).sum(1)).flatten()
+        max_card = cardinalities.max()
+        original_array = np.arange(len(self.y))
+        permutations_array = np.zeros((99, max_card), dtype=np.int32)
+
+        for i in range(99):
+            random_number = np.random.randint(0, len(self.y) - max_card)
+            permutations_array[i] = original_array[random_number:random_number + max_card]
+
+        lm = moran.Moran_Local(
+            self.y,
+            w,
+            transformation="r",
+            permutations=permutations_array,
+            keep_simulations=True,
+            seed=SEED,
+        )
+        np.testing.assert_allclose(lm.z_sim[0], -0.49229215590070813)
+        np.testing.assert_allclose(lm.p_z_sim[0], 0.311256412279757)
 
     @parametrize_sac
     def test_Moran_Local_labels(self, w):