Fix edge cases for calc_even_quantiles

metno · Mar 14, 2024 · 3852034 · 3852034
1 parent a0d52f7
commit 3852034
Show file tree

Hide file tree

Showing 2 changed files with 136 additions and 21 deletions.
diff --git a/src/api/util.cpp b/src/api/util.cpp
@@ -194,8 +194,10 @@ vec2 gridpp::calc_quantile(const vec3& array, const vec2& quantile) {
     if(X == 0)
         return vec2();
     int T = array[0][0].size();
-    if(T == 0)
-        return vec2();
+    if(T == 0) {
+        vec2 output = gridpp::init_vec2(Y, X, gridpp::MV);
+        return output;
+    }
     vec2 output = gridpp::init_vec2(Y, X);
     for(int y = 0; y < Y; y++) {
         for(int x = 0; x < X; x++) {
@@ -257,48 +259,76 @@ double gridpp::clock() {
     return sec + msec/1e6;
 }
 vec gridpp::calc_even_quantiles(const vec& values, int num) {
-    if(num >= values.size())
-        return  values;
-
     vec quantiles;
-    if(num == 0)
+    int size = values.size();
+    if(num == 0 || size == 0)
         return quantiles;
 
     vec sorted = values;
     std::sort(sorted.begin(), sorted.end());
 
-    quantiles.reserve(num);
-    quantiles.push_back(sorted[0]);
+    // Asking for more quantiles than we have data points
+    if(num >= size) {
+        // Find the unique values
+        quantiles.reserve(num);
+        quantiles.push_back(sorted[0]);
+        for(int i = 1; i < size; i++) {
+            if(sorted[i] != sorted[i-1])
+                quantiles.push_back(sorted[i]);
+        }
+        return  quantiles;
+    }
+
+
+    float lowest = sorted[0];
+    float highest = sorted[size - 1];
+
+    // If there are multiple identical values at the start, pick the first value past the set of
+    // identical values
     int count_lower = 0;
     for(int i = 0; i < sorted.size(); i++) {
-        if(sorted[i] != quantiles[0])
+        if(sorted[i] != lowest)
             break;
         count_lower++;
     }
-    int size = sorted.size();
-    if(count_lower > size / num)
+    quantiles.reserve(num);
+    quantiles.push_back(lowest);
+    float last_added = lowest;
+    if(num == 2) {
+        if(lowest != highest)
+            quantiles.push_back(highest);
+        return quantiles;
+    }
+
+    // Add the first past the lowest set of repeated values
+    bool repeated_at_beginning = count_lower < size && count_lower > size / num;
+    if(repeated_at_beginning) {
+        assert(count_lower < sorted.size());
         quantiles.push_back(sorted[count_lower]);
+    }
+    last_added = quantiles[quantiles.size() - 1];
 
     // Remove duplicates
-    vec values_unique;
-    values_unique.reserve(sorted.size());
-    float last_quantile = quantiles[quantiles.size() - 1];
+    vec remaining_unique_values;
+    remaining_unique_values.reserve(sorted.size());
     for(int i = 0; i < sorted.size(); i++) {
-        if(sorted[i] > last_quantile && (values_unique.size() == 0 || sorted[i] != values_unique[values_unique.size() - 1]))
-            values_unique.push_back(sorted[i]);
+        if(sorted[i] > last_added && (remaining_unique_values.size() == 0 || sorted[i] != remaining_unique_values[remaining_unique_values.size() - 1]))
+            remaining_unique_values.push_back(sorted[i]);
     }
-    if(values_unique.size() > 0) {
+
+    if(remaining_unique_values.size() > 0) {
         int num_left = num - quantiles.size();
         for(int i = 1; i <= num_left; i++) {
             float f = float(i) / (num_left);
-            int index = values_unique.size() * f - 1;
+            int index = remaining_unique_values.size() * f - 1;
             if(index >= 0) {
-                float value = values_unique[index];
+                assert(index < remaining_unique_values.size());
+                float value = remaining_unique_values[index];
                 quantiles.push_back(value);
             }
             else {
-                std::cout << i << " " << f << " " << index << " " << num_left << " " << values_unique.size() << std::endl;
-                std::cout << count_lower << " " << sorted.size() << " " << last_quantile << std::endl;
+                std::cout << i << " " << f << " " << index << " " << num_left << " " << remaining_unique_values.size() << std::endl;
+                std::cout << count_lower << " " << sorted.size() << " " << last_added << std::endl;
                 throw std::runtime_error("Internal error in calc_even_quantiles.");
             }
         }

diff --git a/tests/test_util.py b/tests/test_util.py
@@ -78,11 +78,40 @@ def test_calc_quantile(self):
         self.assertEqual(len(quantile_of_nan_list), 1)
         self.assertTrue(np.isnan(quantile_of_nan_list[0]))
 
+    def test_calc_quantile_spatially_varying(self):
+        Q = 5
+        Y = 3
+        X = 2
+        input = np.reshape(np.arange(Y * X * Q), [Y, X, Q])
+        levels = np.reshape([0.25, 0.6]*int(Y*X/2), [Y, X])
+        output = gridpp.calc_quantile(input, levels)
+        expected = np.reshape([1, 7.4, 11, 17.4, 21, 27.4], [Y, X])
+        np.testing.assert_array_almost_equal(output, expected)
+
+    def test_calc_quantile_spatially_varying_invalid_arguments(self):
+        with self.assertRaises(ValueError) as e:
+            # Dimension mismatch
+            gridpp.calc_quantile(np.zeros([2, 3, 4]), np.zeros([1,3]))
+
+    def test_calc_quantile_spatially_varying_empty(self):
+        output = gridpp.calc_quantile(np.zeros([2, 0, 3]), np.zeros([2, 0]))
+        np.testing.assert_array_almost_equal(output.shape, [0, 0])
+
+        output = gridpp.calc_quantile(np.zeros([0, 2, 3]), np.zeros([0, 2]))
+        np.testing.assert_array_almost_equal(output.shape, [0, 0])
+
+        output = gridpp.calc_quantile(np.zeros([2, 2, 0]), np.zeros([2, 2]))
+        np.testing.assert_array_almost_equal(output, np.nan*np.zeros([2, 2]))
+
     def test_calc_quantile_invalid_argument(self):
         quantiles = [1.1, -0.1]
         for quantile in quantiles:
             with self.assertRaises(ValueError) as e:
                 gridpp.calc_quantile([0, 1, 2], quantile)
+            with self.assertRaises(ValueError) as e:
+                gridpp.calc_quantile([[0, 1, 2]], quantile)
+            with self.assertRaises(ValueError) as e:
+                gridpp.calc_quantile(np.zeros([2,3,4]), quantile*np.ones([2,3]))
         self.assertTrue(np.isnan(gridpp.calc_quantile([0, 1, 2], np.nan)))
 
     def test_calc_statistic_randomchoice(self):
@@ -187,5 +216,61 @@ def test_set_debug_level(self):
             gridpp.set_debug_level(level)
             self.assertEqual(level, gridpp.get_debug_level())
 
+    def test_debug(self):
+        gridpp.debug("test")
+        gridpp.debug("")
+
+    def test_future_deprecation_warning(self):
+        gridpp.future_deprecation_warning("test")
+        gridpp.future_deprecation_warning("")
+        gridpp.future_deprecation_warning("test", "other")
+        gridpp.future_deprecation_warning("", "other")
+        gridpp.future_deprecation_warning("", "")
+
+    def test_calc_even_quantiles(self):
+        output = gridpp.calc_even_quantiles([1,2,3], 2)
+        np.testing.assert_array_almost_equal(output, [1,3])
+
+        # First and last
+        output = gridpp.calc_even_quantiles(range(10), 2)
+        np.testing.assert_array_almost_equal(output, [0,9])
+
+        # Repeated first number
+        output = gridpp.calc_even_quantiles([1,1,1,1,1,5, 10], 3)
+        np.testing.assert_array_almost_equal(output, [1, 5, 10])
+
+        output = gridpp.calc_even_quantiles([1,1,1,1,1,5, 10], 2)
+        np.testing.assert_array_almost_equal(output, [1, 10])
+
+        output = gridpp.calc_even_quantiles([1,1,1,1,4,5, 10], 3)
+        np.testing.assert_array_almost_equal(output, [1, 4, 10])
+
+        # Repeated numbers
+        for num in [1, 2, 3]:
+            with self.subTest(num=num):
+                output = gridpp.calc_even_quantiles([1,1,1], num)
+                np.testing.assert_array_almost_equal(output, [1])
+
+        # Too little data
+        output = gridpp.calc_even_quantiles([1,2,3], 3)
+        np.testing.assert_array_almost_equal(output, [1,2,3])
+
+        # Too little data with repeated
+        output = gridpp.calc_even_quantiles([1,1,3], 3)
+        np.testing.assert_array_almost_equal(output, [1,3])
+
+        output = gridpp.calc_even_quantiles([1], 2)
+        np.testing.assert_array_almost_equal(output, [1])
+
+        # Empty arrays
+        output = gridpp.calc_even_quantiles([1,2,3], 0)
+        np.testing.assert_array_almost_equal(output, [])
+
+        output = gridpp.calc_even_quantiles([], 0)
+        np.testing.assert_array_almost_equal(output, [])
+
+        output = gridpp.calc_even_quantiles([], 2)
+        np.testing.assert_array_almost_equal(output, [])
+
 if __name__ == '__main__':
     unittest.main()