Skip to content

Commit

Permalink
Fix edge cases for calc_even_quantiles
Browse files Browse the repository at this point in the history
  • Loading branch information
tnipen committed Mar 14, 2024
1 parent a0d52f7 commit 3852034
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 21 deletions.
72 changes: 51 additions & 21 deletions src/api/util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,10 @@ vec2 gridpp::calc_quantile(const vec3& array, const vec2& quantile) {
if(X == 0)
return vec2();
int T = array[0][0].size();
if(T == 0)
return vec2();
if(T == 0) {
vec2 output = gridpp::init_vec2(Y, X, gridpp::MV);
return output;
}
vec2 output = gridpp::init_vec2(Y, X);
for(int y = 0; y < Y; y++) {
for(int x = 0; x < X; x++) {
Expand Down Expand Up @@ -257,48 +259,76 @@ double gridpp::clock() {
return sec + msec/1e6;
}
vec gridpp::calc_even_quantiles(const vec& values, int num) {
if(num >= values.size())
return values;

vec quantiles;
if(num == 0)
int size = values.size();
if(num == 0 || size == 0)
return quantiles;

vec sorted = values;
std::sort(sorted.begin(), sorted.end());

quantiles.reserve(num);
quantiles.push_back(sorted[0]);
// Asking for more quantiles than we have data points
if(num >= size) {
// Find the unique values
quantiles.reserve(num);
quantiles.push_back(sorted[0]);
for(int i = 1; i < size; i++) {
if(sorted[i] != sorted[i-1])
quantiles.push_back(sorted[i]);
}
return quantiles;
}


float lowest = sorted[0];
float highest = sorted[size - 1];

// If there are multiple identical values at the start, pick the first value past the set of
// identical values
int count_lower = 0;
for(int i = 0; i < sorted.size(); i++) {
if(sorted[i] != quantiles[0])
if(sorted[i] != lowest)
break;
count_lower++;
}
int size = sorted.size();
if(count_lower > size / num)
quantiles.reserve(num);
quantiles.push_back(lowest);
float last_added = lowest;
if(num == 2) {
if(lowest != highest)
quantiles.push_back(highest);
return quantiles;
}

// Add the first past the lowest set of repeated values
bool repeated_at_beginning = count_lower < size && count_lower > size / num;
if(repeated_at_beginning) {
assert(count_lower < sorted.size());
quantiles.push_back(sorted[count_lower]);
}
last_added = quantiles[quantiles.size() - 1];

// Remove duplicates
vec values_unique;
values_unique.reserve(sorted.size());
float last_quantile = quantiles[quantiles.size() - 1];
vec remaining_unique_values;
remaining_unique_values.reserve(sorted.size());
for(int i = 0; i < sorted.size(); i++) {
if(sorted[i] > last_quantile && (values_unique.size() == 0 || sorted[i] != values_unique[values_unique.size() - 1]))
values_unique.push_back(sorted[i]);
if(sorted[i] > last_added && (remaining_unique_values.size() == 0 || sorted[i] != remaining_unique_values[remaining_unique_values.size() - 1]))
remaining_unique_values.push_back(sorted[i]);
}
if(values_unique.size() > 0) {

if(remaining_unique_values.size() > 0) {
int num_left = num - quantiles.size();
for(int i = 1; i <= num_left; i++) {
float f = float(i) / (num_left);
int index = values_unique.size() * f - 1;
int index = remaining_unique_values.size() * f - 1;
if(index >= 0) {
float value = values_unique[index];
assert(index < remaining_unique_values.size());
float value = remaining_unique_values[index];
quantiles.push_back(value);
}
else {
std::cout << i << " " << f << " " << index << " " << num_left << " " << values_unique.size() << std::endl;
std::cout << count_lower << " " << sorted.size() << " " << last_quantile << std::endl;
std::cout << i << " " << f << " " << index << " " << num_left << " " << remaining_unique_values.size() << std::endl;
std::cout << count_lower << " " << sorted.size() << " " << last_added << std::endl;
throw std::runtime_error("Internal error in calc_even_quantiles.");
}
}
Expand Down
85 changes: 85 additions & 0 deletions tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,40 @@ def test_calc_quantile(self):
self.assertEqual(len(quantile_of_nan_list), 1)
self.assertTrue(np.isnan(quantile_of_nan_list[0]))

def test_calc_quantile_spatially_varying(self):
Q = 5
Y = 3
X = 2
input = np.reshape(np.arange(Y * X * Q), [Y, X, Q])
levels = np.reshape([0.25, 0.6]*int(Y*X/2), [Y, X])
output = gridpp.calc_quantile(input, levels)
expected = np.reshape([1, 7.4, 11, 17.4, 21, 27.4], [Y, X])
np.testing.assert_array_almost_equal(output, expected)

def test_calc_quantile_spatially_varying_invalid_arguments(self):
with self.assertRaises(ValueError) as e:
# Dimension mismatch
gridpp.calc_quantile(np.zeros([2, 3, 4]), np.zeros([1,3]))

def test_calc_quantile_spatially_varying_empty(self):
output = gridpp.calc_quantile(np.zeros([2, 0, 3]), np.zeros([2, 0]))
np.testing.assert_array_almost_equal(output.shape, [0, 0])

output = gridpp.calc_quantile(np.zeros([0, 2, 3]), np.zeros([0, 2]))
np.testing.assert_array_almost_equal(output.shape, [0, 0])

output = gridpp.calc_quantile(np.zeros([2, 2, 0]), np.zeros([2, 2]))
np.testing.assert_array_almost_equal(output, np.nan*np.zeros([2, 2]))

def test_calc_quantile_invalid_argument(self):
quantiles = [1.1, -0.1]
for quantile in quantiles:
with self.assertRaises(ValueError) as e:
gridpp.calc_quantile([0, 1, 2], quantile)
with self.assertRaises(ValueError) as e:
gridpp.calc_quantile([[0, 1, 2]], quantile)
with self.assertRaises(ValueError) as e:
gridpp.calc_quantile(np.zeros([2,3,4]), quantile*np.ones([2,3]))
self.assertTrue(np.isnan(gridpp.calc_quantile([0, 1, 2], np.nan)))

def test_calc_statistic_randomchoice(self):
Expand Down Expand Up @@ -187,5 +216,61 @@ def test_set_debug_level(self):
gridpp.set_debug_level(level)
self.assertEqual(level, gridpp.get_debug_level())

def test_debug(self):
gridpp.debug("test")
gridpp.debug("")

def test_future_deprecation_warning(self):
gridpp.future_deprecation_warning("test")
gridpp.future_deprecation_warning("")
gridpp.future_deprecation_warning("test", "other")
gridpp.future_deprecation_warning("", "other")
gridpp.future_deprecation_warning("", "")

def test_calc_even_quantiles(self):
output = gridpp.calc_even_quantiles([1,2,3], 2)
np.testing.assert_array_almost_equal(output, [1,3])

# First and last
output = gridpp.calc_even_quantiles(range(10), 2)
np.testing.assert_array_almost_equal(output, [0,9])

# Repeated first number
output = gridpp.calc_even_quantiles([1,1,1,1,1,5, 10], 3)
np.testing.assert_array_almost_equal(output, [1, 5, 10])

output = gridpp.calc_even_quantiles([1,1,1,1,1,5, 10], 2)
np.testing.assert_array_almost_equal(output, [1, 10])

output = gridpp.calc_even_quantiles([1,1,1,1,4,5, 10], 3)
np.testing.assert_array_almost_equal(output, [1, 4, 10])

# Repeated numbers
for num in [1, 2, 3]:
with self.subTest(num=num):
output = gridpp.calc_even_quantiles([1,1,1], num)
np.testing.assert_array_almost_equal(output, [1])

# Too little data
output = gridpp.calc_even_quantiles([1,2,3], 3)
np.testing.assert_array_almost_equal(output, [1,2,3])

# Too little data with repeated
output = gridpp.calc_even_quantiles([1,1,3], 3)
np.testing.assert_array_almost_equal(output, [1,3])

output = gridpp.calc_even_quantiles([1], 2)
np.testing.assert_array_almost_equal(output, [1])

# Empty arrays
output = gridpp.calc_even_quantiles([1,2,3], 0)
np.testing.assert_array_almost_equal(output, [])

output = gridpp.calc_even_quantiles([], 0)
np.testing.assert_array_almost_equal(output, [])

output = gridpp.calc_even_quantiles([], 2)
np.testing.assert_array_almost_equal(output, [])

if __name__ == '__main__':
unittest.main()

0 comments on commit 3852034

Please sign in to comment.