Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added permutations_array to Moran_Local class #345

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 42 additions & 3 deletions esda/crand.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,15 @@ def crand(
Spatial weights object
observed : ndarray
(N,) array with observed values
permutations : int
Number of permutations for conditional randomisation
permutations : int, np.ndarray
Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the
conditional random null hypothesis for each site. Permutations
might be specified as an array of indices if the user needs to add
structure to this conditional permutation null hypothesis. Common
reasons to do this include exchangeability violations, which might
then require us to shuffle observations within (but not between)
groups, or linearity constraints, which may require certain
sequences of observation relationships to be preserved.
keep : Boolean
If True, store simulation; else do not return randomised statistics
n_jobs : int
Expand Down Expand Up @@ -170,8 +177,40 @@ def crand(
# self neighbor, since conditional randomization conditions on site i.
cardinalities = np.array((adj_matrix != 0).sum(1)).flatten()
max_card = cardinalities.max()
permuted_ids = vec_permutations(max_card, n, permutations, seed)

if np.ndim(permutations) == 0:
# Random permutation array
if int(permutations) != permutations:
raise ValueError(
f"An integer number of permutations is required, but {permutations} was provided."
)
permuted_ids = vec_permutations(max_card, n, permutations, seed)
elif np.ndim(permutations) == 2:
# User defined permutation array
permuted_ids = permutations
if permuted_ids.shape[0] != permutations:
permutations = permuted_ids.shape[0]
warnings.warn(
f"Number of permutations has been adjusted to match the length of the "
f"permutations array. New value of 'permutations' is {permutations}.",
stacklevel=2,
)
if permuted_ids.shape[1] < max_card:
raise ValueError(
f"The `permutations` provided were shape {permuted_ids.shape}"
f", but must be ({permutations, max_card}) in order to supply"
f" enough possible neighbors to shuffle every observation."
f" Consider supplying a wider permutation matrix, with"
f" {max_card-permuted_ids.shape[1]} more columns."
)
else:
raise ValueError(
f"The `permutations` argument must be either an integer, or a"
f" two-dimensional numpy array of shape (p,k), where p is the"
f" number of permutations to run and k is the largest amount of"
f" shuffled pairs that must be considered at a given site, but"
f" {permutations} was provided."
)
if n_jobs != 1:
try:
import joblib # noqa: F401
Expand Down
13 changes: 9 additions & 4 deletions esda/geary_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,15 @@ def __init__(
(default=0.05)
Default significance threshold used for
creation of labels groups.
permutations : int
(default=999)
number of random permutations for calculation
of pseudo p_values
permutations : int, np.ndarray
Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the
conditional random null hypothesis for each site. Permutations
might be specified as an array of indices if the user needs to add
structure to this conditional permutation null hypothesis. Common
reasons to do this include exchangeability violations, which might
then require us to shuffle observations within (but not between)
groups, or linearity constraints, which may require certain
sequences of observation relationships to be preserved.
n_jobs : int
(default=1)
Number of cores to be used in the conditional
Expand Down
12 changes: 9 additions & 3 deletions esda/getisord.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,9 +257,15 @@ class G_Local: # noqa: N801
spatial weights instance as W or Graph aligned with y
transform : {'R', 'B'}
the type of w, either 'B' (binary) or 'R' (row-standardized)
permutations : int
the number of random permutations for calculating
pseudo p values
permutations : int, np.ndarray
Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the
conditional random null hypothesis for each site. Permutations
might be specified as an array of indices if the user needs to add
structure to this conditional permutation null hypothesis. Common
reasons to do this include exchangeability violations, which might
then require us to shuffle observations within (but not between)
groups, or linearity constraints, which may require certain
sequences of observation relationships to be preserved.
star : boolean or float
whether or not to include focal observation in sums (default: False)
if the row-transformed weight is provided, then this is the default
Expand Down
12 changes: 9 additions & 3 deletions esda/join_counts_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,15 @@ def __init__(
----------
connectivity : W | Graph
spatial weights instance as W or Graph aligned with y
permutations : int
number of random permutations for calculation of pseudo
p_values
permutations : int, np.ndarray
Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the
conditional random null hypothesis for each site. Permutations
might be specified as an array of indices if the user needs to add
structure to this conditional permutation null hypothesis. Common
reasons to do this include exchangeability violations, which might
then require us to shuffle observations within (but not between)
groups, or linearity constraints, which may require certain
sequences of observation relationships to be preserved.
n_jobs : int
Number of cores to be used in the conditional randomisation. If -1,
all available cores are used.
Expand Down
11 changes: 9 additions & 2 deletions esda/join_counts_local_bv.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,15 @@ def __init__(
----------
connectivity : W | Graph
spatial weights instance as W or Graph aligned with y
permutations : int
number of random permutations for calculation of pseudo p_values
permutations : int, np.ndarray
Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the
conditional random null hypothesis for each site. Permutations
might be specified as an array of indices if the user needs to add
structure to this conditional permutation null hypothesis. Common
reasons to do this include exchangeability violations, which might
then require us to shuffle observations within (but not between)
groups, or linearity constraints, which may require certain
sequences of observation relationships to be preserved.
n_jobs : int
Number of cores to be used in the conditional randomisation.
If -1, all available cores are used.
Expand Down
11 changes: 9 additions & 2 deletions esda/join_counts_local_mv.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,15 @@ def __init__(
----------
connectivity : W | Graph
spatial weights instance as W or Graph aligned with y
permutations : int
number of random permutations for calculation of pseudo p_values
permutations : int, np.ndarray
Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the
conditional random null hypothesis for each site. Permutations
might be specified as an array of indices if the user needs to add
structure to this conditional permutation null hypothesis. Common
reasons to do this include exchangeability violations, which might
then require us to shuffle observations within (but not between)
groups, or linearity constraints, which may require certain
sequences of observation relationships to be preserved.
n_jobs : int
Number of cores to be used in the conditional randomisation. If -1,
all available cores are used.
Expand Down
37 changes: 28 additions & 9 deletions esda/moran.py
Original file line number Diff line number Diff line change
Expand Up @@ -891,9 +891,15 @@ class Moran_Local: # noqa: N801
"D": doubly-standardized,
"U": untransformed (general weights),
"V": variance-stabilizing.
permutations : int
number of random permutations for calculation of pseudo
p_values
permutations : int, np.ndarray
Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the
conditional random null hypothesis for each site. Permutations
might be specified as an array of indices if the user needs to add
structure to this conditional permutation null hypothesis. Common
reasons to do this include exchangeability violations, which might
then require us to shuffle observations within (but not between)
groups, or linearity constraints, which may require certain
sequences of observation relationships to be preserved.
geoda_quads : boolean
(default=False)
If True use GeoDa scheme: HH=1, LL=2, LH=3, HL=4
Expand Down Expand Up @@ -1265,9 +1271,15 @@ class Moran_Local_BV: # noqa: N801
"D": doubly-standardized,
"U": untransformed (general weights),
"V": variance-stabilizing.
permutations : int
number of random permutations for calculation of pseudo
p_values
permutations : int, np.ndarray
Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the
conditional random null hypothesis for each site. Permutations
might be specified as an array of indices if the user needs to add
structure to this conditional permutation null hypothesis. Common
reasons to do this include exchangeability violations, which might
then require us to shuffle observations within (but not between)
groups, or linearity constraints, which may require certain
sequences of observation relationships to be preserved.
geoda_quads : boolean
(default=False)
If True use GeoDa scheme: HH=1, LL=2, LH=3, HL=4
Expand Down Expand Up @@ -1406,6 +1418,7 @@ def __init__(
w,
self.Is,
permutations,
None,
keep_simulations,
n_jobs=n_jobs,
stat_func=_moran_local_bv_crand,
Expand Down Expand Up @@ -1535,9 +1548,15 @@ class Moran_Local_Rate(Moran_Local): # noqa: N801
"D": doubly-standardized,
"U": untransformed (general weights),
"V": variance-stabilizing.
permutations : int
number of random permutations for calculation of pseudo
p_values
permutations : int, np.ndarray
Number of permutations for conditional randomisation, or the permutation array itself. Providing an integer will test the
conditional random null hypothesis for each site. Permutations
might be specified as an array of indices if the user needs to add
structure to this conditional permutation null hypothesis. Common
reasons to do this include exchangeability violations, which might
then require us to shuffle observations within (but not between)
groups, or linearity constraints, which may require certain
sequences of observation relationships to be preserved.
geoda_quads : boolean
(default=False)
If True use GeoDa scheme: HH=1, LL=2, LH=3, HL=4
Expand Down
24 changes: 24 additions & 0 deletions esda/tests/test_moran.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ def setup_method(self):
f = libpysal.io.open(libpysal.examples.get_path("desmith.txt"))
self.y = np.array(f.by_col["z"])


@parametrize_desmith
def test_Moran_Local(self, w):
lm = moran.Moran_Local(
Expand All @@ -174,6 +175,29 @@ def test_Moran_Local(self, w):
)
np.testing.assert_allclose(lm.z_sim[0], -0.6990291160835514)
np.testing.assert_allclose(lm.p_z_sim[0], 0.24226691753791396)

@parametrize_desmith
def test_Moran_Local_custom_perms(self, w):
np.random.seed(SEED)
cardinalities = np.array((w.sparse != 0).sum(1)).flatten()
max_card = cardinalities.max()
original_array = np.arange(len(self.y))
permutations_array = np.zeros((99, max_card), dtype=np.int32)

for i in range(99):
random_number = np.random.randint(0, len(self.y) - max_card)
permutations_array[i] = original_array[random_number:random_number + max_card]

lm = moran.Moran_Local(
self.y,
w,
transformation="r",
permutations=permutations_array,
keep_simulations=True,
seed=SEED,
)
np.testing.assert_allclose(lm.z_sim[0], -0.49229215590070813)
np.testing.assert_allclose(lm.p_z_sim[0], 0.311256412279757)

@parametrize_sac
def test_Moran_Local_labels(self, w):
Expand Down
Loading