16
16
// under the License.
17
17
18
18
//! Regex expressions
19
- use arrow:: array:: { Array , ArrayRef , OffsetSizeTrait } ;
19
+ use arrow:: array:: { Array , ArrayRef , AsArray } ;
20
20
use arrow:: compute:: kernels:: regexp;
21
21
use arrow:: datatypes:: DataType ;
22
22
use arrow:: datatypes:: Field ;
23
23
use datafusion_common:: exec_err;
24
24
use datafusion_common:: ScalarValue ;
25
25
use datafusion_common:: { arrow_datafusion_err, plan_err} ;
26
- use datafusion_common:: {
27
- cast:: as_generic_string_array, internal_err, DataFusionError , Result ,
28
- } ;
26
+ use datafusion_common:: { DataFusionError , Result } ;
29
27
use datafusion_expr:: { ColumnarValue , Documentation , TypeSignature } ;
30
28
use datafusion_expr:: { ScalarUDFImpl , Signature , Volatility } ;
31
29
use datafusion_macros:: user_doc;
@@ -86,11 +84,12 @@ impl RegexpMatchFunc {
86
84
signature : Signature :: one_of (
87
85
vec ! [
88
86
// Planner attempts coercion to the target type starting with the most preferred candidate.
89
- // For example, given input `(Utf8View, Utf8)`, it first tries coercing to `(Utf8, Utf8 )`.
90
- // If that fails, it proceeds to `(LargeUtf8 , Utf8)`.
91
- // TODO: Native support Utf8View for regexp_match.
87
+ // For example, given input `(Utf8View, Utf8)`, it first tries coercing to `(Utf8View, Utf8View )`.
88
+ // If that fails, it proceeds to `(Utf8 , Utf8)`.
89
+ TypeSignature :: Exact ( vec! [ Utf8View , Utf8View ] ) ,
92
90
TypeSignature :: Exact ( vec![ Utf8 , Utf8 ] ) ,
93
91
TypeSignature :: Exact ( vec![ LargeUtf8 , LargeUtf8 ] ) ,
92
+ TypeSignature :: Exact ( vec![ Utf8View , Utf8View , Utf8View ] ) ,
94
93
TypeSignature :: Exact ( vec![ Utf8 , Utf8 , Utf8 ] ) ,
95
94
TypeSignature :: Exact ( vec![ LargeUtf8 , LargeUtf8 , LargeUtf8 ] ) ,
96
95
] ,
@@ -138,7 +137,7 @@ impl ScalarUDFImpl for RegexpMatchFunc {
138
137
. map ( |arg| arg. to_array ( inferred_length) )
139
138
. collect :: < Result < Vec < _ > > > ( ) ?;
140
139
141
- let result = regexp_match_func ( & args) ;
140
+ let result = regexp_match ( & args) ;
142
141
if is_scalar {
143
142
// If all inputs are scalar, keeps output as scalar
144
143
let result = result. and_then ( |arr| ScalarValue :: try_from_array ( & arr, 0 ) ) ;
@@ -153,33 +152,35 @@ impl ScalarUDFImpl for RegexpMatchFunc {
153
152
}
154
153
}
155
154
156
- fn regexp_match_func ( args : & [ ArrayRef ] ) -> Result < ArrayRef > {
157
- match args[ 0 ] . data_type ( ) {
158
- DataType :: Utf8 => regexp_match :: < i32 > ( args) ,
159
- DataType :: LargeUtf8 => regexp_match :: < i64 > ( args) ,
160
- other => {
161
- internal_err ! ( "Unsupported data type {other:?} for function regexp_match" )
162
- }
163
- }
164
- }
165
- pub fn regexp_match < T : OffsetSizeTrait > ( args : & [ ArrayRef ] ) -> Result < ArrayRef > {
155
+ pub fn regexp_match ( args : & [ ArrayRef ] ) -> Result < ArrayRef > {
166
156
match args. len ( ) {
167
157
2 => {
168
- let values = as_generic_string_array :: < T > ( & args[ 0 ] ) ?;
169
- let regex = as_generic_string_array :: < T > ( & args[ 1 ] ) ?;
170
- regexp:: regexp_match ( values, regex, None )
158
+ regexp:: regexp_match ( & args[ 0 ] , & args[ 1 ] , None )
171
159
. map_err ( |e| arrow_datafusion_err ! ( e) )
172
160
}
173
161
3 => {
174
- let values = as_generic_string_array :: < T > ( & args[ 0 ] ) ?;
175
- let regex = as_generic_string_array :: < T > ( & args[ 1 ] ) ?;
176
- let flags = as_generic_string_array :: < T > ( & args[ 2 ] ) ?;
177
-
178
- if flags. iter ( ) . any ( |s| s == Some ( "g" ) ) {
179
- return plan_err ! ( "regexp_match() does not support the \" global\" option" ) ;
162
+ match args[ 2 ] . data_type ( ) {
163
+ DataType :: Utf8View => {
164
+ if args[ 2 ] . as_string_view ( ) . iter ( ) . any ( |s| s == Some ( "g" ) ) {
165
+ return plan_err ! ( "regexp_match() does not support the \" global\" option" ) ;
166
+ }
167
+ }
168
+ DataType :: Utf8 => {
169
+ if args[ 2 ] . as_string :: < i32 > ( ) . iter ( ) . any ( |s| s == Some ( "g" ) ) {
170
+ return plan_err ! ( "regexp_match() does not support the \" global\" option" ) ;
171
+ }
172
+ }
173
+ DataType :: LargeUtf8 => {
174
+ if args[ 2 ] . as_string :: < i64 > ( ) . iter ( ) . any ( |s| s == Some ( "g" ) ) {
175
+ return plan_err ! ( "regexp_match() does not support the \" global\" option" ) ;
176
+ }
177
+ }
178
+ e => {
179
+ return plan_err ! ( "regexp_match was called with unexpected data type {e:?}" ) ;
180
+ }
180
181
}
181
182
182
- regexp:: regexp_match ( values , regex , Some ( flags ) )
183
+ regexp:: regexp_match ( & args [ 0 ] , & args [ 1 ] , Some ( & args [ 2 ] ) )
183
184
. map_err ( |e| arrow_datafusion_err ! ( e) )
184
185
}
185
186
other => exec_err ! (
@@ -211,7 +212,7 @@ mod tests {
211
212
expected_builder. append ( false ) ;
212
213
let expected = expected_builder. finish ( ) ;
213
214
214
- let re = regexp_match :: < i32 > ( & [ Arc :: new ( values) , Arc :: new ( patterns) ] ) . unwrap ( ) ;
215
+ let re = regexp_match ( & [ Arc :: new ( values) , Arc :: new ( patterns) ] ) . unwrap ( ) ;
215
216
216
217
assert_eq ! ( re. as_ref( ) , & expected) ;
217
218
}
@@ -236,9 +237,8 @@ mod tests {
236
237
expected_builder. append ( false ) ;
237
238
let expected = expected_builder. finish ( ) ;
238
239
239
- let re =
240
- regexp_match :: < i32 > ( & [ Arc :: new ( values) , Arc :: new ( patterns) , Arc :: new ( flags) ] )
241
- . unwrap ( ) ;
240
+ let re = regexp_match ( & [ Arc :: new ( values) , Arc :: new ( patterns) , Arc :: new ( flags) ] )
241
+ . unwrap ( ) ;
242
242
243
243
assert_eq ! ( re. as_ref( ) , & expected) ;
244
244
}
@@ -250,7 +250,7 @@ mod tests {
250
250
let flags = StringArray :: from ( vec ! [ "g" ] ) ;
251
251
252
252
let re_err =
253
- regexp_match :: < i32 > ( & [ Arc :: new ( values) , Arc :: new ( patterns) , Arc :: new ( flags) ] )
253
+ regexp_match ( & [ Arc :: new ( values) , Arc :: new ( patterns) , Arc :: new ( flags) ] )
254
254
. expect_err ( "unsupported flag should have failed" ) ;
255
255
256
256
assert_eq ! ( re_err. strip_backtrace( ) , "Error during planning: regexp_match() does not support the \" global\" option" ) ;
0 commit comments