Skip to content

Commit a42524b

Browse files
committed
Respect nulls in approx_percentile_cont
1 parent 2f5e73c commit a42524b

File tree

2 files changed

+20
-3
lines changed

2 files changed

+20
-3
lines changed

datafusion/functions-aggregate/src/approx_percentile_cont.rs

+14-3
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ use std::any::Any;
1919
use std::fmt::{Debug, Formatter};
2020
use std::sync::Arc;
2121

22-
use arrow::array::RecordBatch;
22+
use arrow::array::{Array, RecordBatch};
23+
use arrow::compute::{filter, is_not_null};
2324
use arrow::{
2425
array::{
2526
ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array,
@@ -104,6 +105,12 @@ impl ApproxPercentileCont {
104105
None
105106
};
106107

108+
if args.ignore_nulls {
109+
return not_impl_err!(
110+
"IGNORE NULLS clause not yet supported for APPROX_PERCENTILE_CONT"
111+
);
112+
}
113+
107114
let accumulator: ApproxPercentileAccumulator = match args.input_type {
108115
t @ (DataType::UInt8
109116
| DataType::UInt16
@@ -393,8 +400,12 @@ impl Accumulator for ApproxPercentileAccumulator {
393400
}
394401

395402
fn update_batch(&mut self, values: &[ArrayRef]) -> datafusion_common::Result<()> {
396-
let values = &values[0];
397-
let sorted_values = &arrow::compute::sort(values, None)?;
403+
// respect nulls by default
404+
let mut values = values[0];
405+
if let Some(nulls) = values.nulls() {
406+
values = filter(&values, &is_not_null(values)?)?;
407+
}
408+
let sorted_values = &arrow::compute::sort(&values, None)?;
398409
let sorted_values = ApproxPercentileAccumulator::convert_to_float(sorted_values)?;
399410
self.digest = self.digest.merge_sorted_f64(&sorted_values);
400411
Ok(())

datafusion/sqllogictest/test_files/aggregate.slt

+6
Original file line numberDiff line numberDiff line change
@@ -1237,6 +1237,12 @@ SELECT (ABS(1 - CAST(approx_percentile_cont(c11, 0.9) AS DOUBLE) / 0.834) < 0.05
12371237
----
12381238
true
12391239

1240+
# percentile_cont_with_nulls
1241+
query I
1242+
SELECT APPROX_PERCENTILE_CONT(v, 0.5) FROM (VALUES (1), (2), (3), (NULL), (NULL), (NULL)) as t (v);
1243+
----
1244+
2
1245+
12401246
# csv_query_cube_avg
12411247
query TIR
12421248
SELECT c1, c2, AVG(c3) FROM aggregate_test_100 GROUP BY CUBE (c1, c2) ORDER BY c1, c2

0 commit comments

Comments
 (0)