Skip to content

Commit d6107ac

Browse files
committed
add maxcount and reset funcs
1 parent e7452fb commit d6107ac

File tree

4 files changed

+40
-8
lines changed

4 files changed

+40
-8
lines changed

bodkin.go

+31-5
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"bytes"
88
"errors"
99
"fmt"
10+
"math"
1011
"os"
1112
"slices"
1213
"strings"
@@ -52,7 +53,8 @@ type Bodkin struct {
5253
new *fieldPos
5354
knownFields *omap.OrderedMap[string, *fieldPos]
5455
untypedFields *omap.OrderedMap[string, *fieldPos]
55-
unificationCount int
56+
unificationCount int64
57+
maxCount int64
5658
inferTimeUnits bool
5759
quotedValuesAreStrings bool
5860
typeConversion bool
@@ -90,7 +92,7 @@ func newBodkin(m map[string]any, opts ...Option) (*Bodkin, error) {
9092
f := newFieldPos(b)
9193
mapToArrow(f, m)
9294
b.old = f
93-
95+
b.maxCount = int64(math.MaxInt64)
9496
return b, err
9597
}
9698

@@ -165,7 +167,24 @@ func (u *Bodkin) Err() []Field {
165167
func (u *Bodkin) Changes() error { return u.changes }
166168

167169
// Count returns the number of datum evaluated for schema to date.
168-
func (u *Bodkin) Count() int { return u.unificationCount }
170+
func (u *Bodkin) Count() int64 { return u.unificationCount }
171+
172+
// MaxCount returns the maximum number of datum to be evaluated for schema.
173+
func (u *Bodkin) MaxCount() int64 { return u.unificationCount }
174+
175+
// ResetCount resets the count of datum evaluated for schema to date.
176+
func (u *Bodkin) ResetCount() int64 {
177+
u.unificationCount = 0
178+
return u.unificationCount
179+
}
180+
181+
// ResetMaxCount resets the maximum number of datam to be evaluated for schema
182+
// to maxInt64.
183+
// ResetCount resets the count of datum evaluated for schema to date.
184+
func (u *Bodkin) ResetMaxCount() int64 {
185+
u.maxCount = int64(math.MaxInt64)
186+
return u.unificationCount
187+
}
169188

170189
// Paths returns a slice of dotpaths of fields successfully evaluated to date.
171190
func (u *Bodkin) Paths() []Field {
@@ -274,11 +293,14 @@ func (u *Bodkin) ImportSchema(importPath string) (*arrow.Schema, error) {
274293

275294
// Unify merges structured input's column definition with the previously input's schema.
276295
// Any uppopulated fields, empty objects or empty slices in JSON input are skipped.
277-
func (u *Bodkin) Unify(a any) {
296+
func (u *Bodkin) Unify(a any) error {
297+
if u.unificationCount > u.maxCount {
298+
return fmt.Errorf("maxcount exceeded")
299+
}
278300
m, err := InputMap(a)
279301
if err != nil {
280302
u.err = fmt.Errorf("%v : %v", ErrInvalidInput, err)
281-
return
303+
return fmt.Errorf("%v : %v", ErrInvalidInput, err)
282304
}
283305

284306
f := newFieldPos(u)
@@ -288,13 +310,17 @@ func (u *Bodkin) Unify(a any) {
288310
u.merge(field, nil)
289311
}
290312
u.unificationCount++
313+
return nil
291314
}
292315

293316
// Unify merges structured input's column definition with the previously input's schema,
294317
// using a specified valid path as the root. An error is returned if the mergeAt path is
295318
// not found.
296319
// Any uppopulated fields, empty objects or empty slices in JSON input are skipped.
297320
func (u *Bodkin) UnifyAtPath(a any, mergeAt string) error {
321+
if u.unificationCount > u.maxCount {
322+
return fmt.Errorf("maxcount exceeded")
323+
}
298324
mergePath := make([]string, 0)
299325
if !(len(mergeAt) == 0 || mergeAt == "$") {
300326
mergePath = strings.Split(strings.TrimPrefix(mergeAt, "$"), ".")

cmd/main.go

-1
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,6 @@ func main() {
179179
} else {
180180
fmt.Printf("imported %v\n", imp.String())
181181
}
182-
183182
}
184183

185184
}

json2parquet/json2parquet.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import (
1414
"github.com/loicalleyne/bodkin/pq"
1515
)
1616

17-
func FromReader(r io.Reader, opts ...bodkin.Option) (*arrow.Schema, int, error) {
17+
func FromReader(r io.Reader, opts ...bodkin.Option) (*arrow.Schema, int64, error) {
1818
var err error
1919
s := bufio.NewScanner(r)
2020
var u *bodkin.Bodkin
@@ -28,7 +28,7 @@ func FromReader(r io.Reader, opts ...bodkin.Option) (*arrow.Schema, int, error)
2828
}
2929
for s.Scan() {
3030
u.Unify(s.Bytes())
31-
if u.Count() > 10000 {
31+
if u.Count() > u.MaxCount() {
3232
break
3333
}
3434
}

option.go

+7
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,10 @@ func WithQuotedValuesAreStrings() Option {
3232
cfg.quotedValuesAreStrings = true
3333
}
3434
}
35+
36+
// WithMaxCount enables capping the number of Unify evaluations.
37+
func WithMaxCount(i int64) Option {
38+
return func(cfg config) {
39+
cfg.maxCount = i
40+
}
41+
}

0 commit comments

Comments
 (0)