Skip to content

Commit e61a9bc

Browse files
committed
NextBatch
1 parent f35fec3 commit e61a9bc

File tree

2 files changed

+73
-10
lines changed

2 files changed

+73
-10
lines changed

cmd/main.go

+18-7
Original file line numberDiff line numberDiff line change
@@ -59,14 +59,25 @@ func main() {
5959
log.Printf("elapsed: %v\n", time.Since(start))
6060

6161
i := 0
62-
for r.Next() {
63-
rec := r.Record()
64-
_, err := rec.MarshalJSON()
65-
if err != nil {
66-
fmt.Printf("error marshaling record: %v\n", err)
62+
// for r.Next() {
63+
// rec := r.Record()
64+
// _, err := rec.MarshalJSON()
65+
// if err != nil {
66+
// fmt.Printf("error marshaling record: %v\n", err)
67+
// }
68+
// // fmt.Printf("\nmarshaled record :\n%v\n", string(rj))
69+
// i++
70+
// }
71+
for r.NextBatch(1024) {
72+
recs := r.RecordBatch()
73+
for _, rec := range recs {
74+
_, err := rec.MarshalJSON()
75+
if err != nil {
76+
fmt.Printf("error marshaling record: %v\n", err)
77+
}
78+
// fmt.Printf("\nmarshaled record :\n%v\n", string(rj))
79+
i++
6780
}
68-
// fmt.Printf("\nmarshaled record :\n%v\n", string(rj))
69-
i++
7081
}
7182
log.Println("records", r.Count(), i)
7283
}

reader/reader.go

+55-3
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ type DataReader struct {
4949
bldMap *fieldPos
5050
ldr *dataLoader
5151
cur arrow.Record
52+
curBatch []arrow.Record
5253
readerCtx context.Context
5354
readCancel func()
5455
err error
@@ -111,6 +112,8 @@ func NewReader(schema *arrow.Schema, source DataSource, opts ...Option) (*DataRe
111112
return r, nil
112113
}
113114

115+
// ReadToRecord decodes a datum directly to an arrow.Record. The record
116+
// should be released by the user when done with it.
114117
func (r *DataReader) ReadToRecord(a any) (arrow.Record, error) {
115118
var err error
116119
defer func() {
@@ -147,6 +150,52 @@ func (r *DataReader) ReadToRecord(a any) (arrow.Record, error) {
147150
return r.bld.NewRecord(), nil
148151
}
149152

153+
// NextBatch returns whether a []arrow.Record of a specified size can be received
154+
// from the converted record queue. Will still return true if the queue channel is closed and
155+
// last batch of records available < batch size specified.
156+
// The user should check Err() after a call to NextBatch that returned false to check
157+
// if an error took place.
158+
func (r *DataReader) NextBatch(batchSize int) bool {
159+
if batchSize < 1 {
160+
batchSize = 1
161+
}
162+
if len(r.curBatch) != 0 {
163+
for _, rec := range r.curBatch {
164+
rec.Release()
165+
}
166+
r.curBatch = []arrow.Record{}
167+
}
168+
r.wg.Wait()
169+
170+
for len(r.curBatch) <= batchSize {
171+
select {
172+
case rec, ok := <-r.recChan:
173+
if !ok && rec == nil {
174+
if len(r.curBatch) > 0 {
175+
goto jump
176+
}
177+
return false
178+
}
179+
if rec != nil {
180+
r.curBatch = append(r.curBatch, rec)
181+
}
182+
case <-r.bldDone:
183+
if len(r.recChan) > 0 {
184+
r.cur = <-r.recChan
185+
}
186+
case <-r.readerCtx.Done():
187+
return false
188+
}
189+
}
190+
191+
jump:
192+
if r.err != nil {
193+
return false
194+
}
195+
196+
return len(r.curBatch) > 0
197+
}
198+
150199
// Next returns whether a Record can be received from the converted record queue.
151200
// The user should check Err() after a call to Next that returned false to check
152201
// if an error took place.
@@ -156,7 +205,6 @@ func (r *DataReader) Next() bool {
156205
r.cur.Release()
157206
r.cur = nil
158207
}
159-
160208
r.wg.Wait()
161209
select {
162210
case r.cur, ok = <-r.recChan:
@@ -195,8 +243,12 @@ func (r *DataReader) Opts() []Option { return r.opts }
195243

196244
// Record returns the current Arrow record.
197245
// It is valid until the next call to Next.
198-
func (r *DataReader) Record() arrow.Record { return r.cur }
199-
func (r *DataReader) Schema() *arrow.Schema { return r.schema }
246+
func (r *DataReader) Record() arrow.Record { return r.cur }
247+
248+
// Record returns the current Arrow record batch.
249+
// It is valid until the next call to NextBatch.
250+
func (r *DataReader) RecordBatch() []arrow.Record { return r.curBatch }
251+
func (r *DataReader) Schema() *arrow.Schema { return r.schema }
200252

201253
// Err returns the last error encountered during the reading of data.
202254
func (r *DataReader) Err() error { return r.err }

0 commit comments

Comments
 (0)