@@ -49,6 +49,7 @@ type DataReader struct {
49
49
bldMap * fieldPos
50
50
ldr * dataLoader
51
51
cur arrow.Record
52
+ curBatch []arrow.Record
52
53
readerCtx context.Context
53
54
readCancel func ()
54
55
err error
@@ -111,6 +112,8 @@ func NewReader(schema *arrow.Schema, source DataSource, opts ...Option) (*DataRe
111
112
return r , nil
112
113
}
113
114
115
+ // ReadToRecord decodes a datum directly to an arrow.Record. The record
116
+ // should be released by the user when done with it.
114
117
func (r * DataReader ) ReadToRecord (a any ) (arrow.Record , error ) {
115
118
var err error
116
119
defer func () {
@@ -147,6 +150,52 @@ func (r *DataReader) ReadToRecord(a any) (arrow.Record, error) {
147
150
return r .bld .NewRecord (), nil
148
151
}
149
152
153
+ // NextBatch returns whether a []arrow.Record of a specified size can be received
154
+ // from the converted record queue. Will still return true if the queue channel is closed and
155
+ // last batch of records available < batch size specified.
156
+ // The user should check Err() after a call to NextBatch that returned false to check
157
+ // if an error took place.
158
+ func (r * DataReader ) NextBatch (batchSize int ) bool {
159
+ if batchSize < 1 {
160
+ batchSize = 1
161
+ }
162
+ if len (r .curBatch ) != 0 {
163
+ for _ , rec := range r .curBatch {
164
+ rec .Release ()
165
+ }
166
+ r .curBatch = []arrow.Record {}
167
+ }
168
+ r .wg .Wait ()
169
+
170
+ for len (r .curBatch ) <= batchSize {
171
+ select {
172
+ case rec , ok := <- r .recChan :
173
+ if ! ok && rec == nil {
174
+ if len (r .curBatch ) > 0 {
175
+ goto jump
176
+ }
177
+ return false
178
+ }
179
+ if rec != nil {
180
+ r .curBatch = append (r .curBatch , rec )
181
+ }
182
+ case <- r .bldDone :
183
+ if len (r .recChan ) > 0 {
184
+ r .cur = <- r .recChan
185
+ }
186
+ case <- r .readerCtx .Done ():
187
+ return false
188
+ }
189
+ }
190
+
191
+ jump:
192
+ if r .err != nil {
193
+ return false
194
+ }
195
+
196
+ return len (r .curBatch ) > 0
197
+ }
198
+
150
199
// Next returns whether a Record can be received from the converted record queue.
151
200
// The user should check Err() after a call to Next that returned false to check
152
201
// if an error took place.
@@ -156,7 +205,6 @@ func (r *DataReader) Next() bool {
156
205
r .cur .Release ()
157
206
r .cur = nil
158
207
}
159
-
160
208
r .wg .Wait ()
161
209
select {
162
210
case r .cur , ok = <- r .recChan :
@@ -195,8 +243,12 @@ func (r *DataReader) Opts() []Option { return r.opts }
195
243
196
244
// Record returns the current Arrow record.
197
245
// It is valid until the next call to Next.
198
- func (r * DataReader ) Record () arrow.Record { return r .cur }
199
- func (r * DataReader ) Schema () * arrow.Schema { return r .schema }
246
+ func (r * DataReader ) Record () arrow.Record { return r .cur }
247
+
248
+ // Record returns the current Arrow record batch.
249
+ // It is valid until the next call to NextBatch.
250
+ func (r * DataReader ) RecordBatch () []arrow.Record { return r .curBatch }
251
+ func (r * DataReader ) Schema () * arrow.Schema { return r .schema }
200
252
201
253
// Err returns the last error encountered during the reading of data.
202
254
func (r * DataReader ) Err () error { return r .err }
0 commit comments