Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Draft] Add support for custom allocators #366

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions allocator.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package roaring

// Allocator is the interface for allocating various datastructures used
// in this library. Its primary purpose it provides users with the ability
// to control individual allocations in a relatively non-invasive way.
type Allocator interface {
AllocateBytes(size int) []byte
AllocateUInt16s(size int) []uint16
}
8 changes: 8 additions & 0 deletions arraycontainer.go
Original file line number Diff line number Diff line change
Expand Up @@ -923,6 +923,14 @@ func newArrayContainer() *arrayContainer {
return p
}

func newArrayContainerFromAllocator(allocator Allocator) *arrayContainer {
p := new(arrayContainer)
if allocator != nil {
p.content = allocator.AllocateUInt16s(0)[:0]
}
return p
}

func newArrayContainerFromBitmap(bc *bitmapContainer) *arrayContainer {
ac := &arrayContainer{}
ac.loadData(bc)
Expand Down
47 changes: 43 additions & 4 deletions benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@ package roaring
import (
"bytes"
"fmt"
"github.com/stretchr/testify/require"
"math/rand"
"testing"

"github.com/stretchr/testify/require"

"github.com/bits-and-blooms/bitset"
)

// BENCHMARKS, to run them type "go test -bench Benchmark -run -"


// go test -bench BenchmarkIteratorAlloc -benchmem -run -
func BenchmarkIteratorAlloc(b *testing.B) {
bm := NewBitmap()
Expand Down Expand Up @@ -84,7 +84,6 @@ func BenchmarkIteratorAlloc(b *testing.B) {
b.Fatalf("Cardinalities don't match: %d, %d", counter, expected_cardinality)
}


b.Run("many iteration with alloc", func(b *testing.B) {
for n := 0; n < b.N; n++ {
counter = 0
Expand Down Expand Up @@ -117,7 +116,6 @@ func BenchmarkIteratorAlloc(b *testing.B) {
}
}


// go test -bench BenchmarkOrs -benchmem -run -
func BenchmarkOrs(b *testing.B) {

Expand Down Expand Up @@ -1134,3 +1132,44 @@ func BenchmarkAndAny(b *testing.B) {
runSet("small-filters", genOne(r, largeSize, domain), genMulti(r, filtersNum, smallSize, domain))
runSet("equal", genOne(r, defaultSize, domain), genMulti(r, filtersNum, defaultSize, domain))
}

type benchAllocator struct {
buf []byte
uint16s []uint16
}

func (a benchAllocator) AllocateBytes(size int) []byte {
if size <= cap(a.buf) {
return a.buf[:size]
}
return make([]byte, size)
}

func (a benchAllocator) AllocateUInt16s(size int) []uint16 {
if size <= cap(a.uint16s) {
return a.uint16s[:size]
}
return make([]uint16, size)
}

func BenchmarkRepeatedSparseSerialization(b *testing.B) {
var (
allocator = benchAllocator{
buf: make([]byte, 4096),
uint16s: make([]uint16, 4096),
}
l = NewWithAllocator(allocator)
buf = bytes.NewBuffer(nil)
)
for i := 0; i < b.N; i++ {
l.Clear()
for j := 0; j < 16; j++ {
l.Add(uint32(j))
}
buf.Reset()
_, err := l.WriteTo(buf)
if err != nil {
panic(err)
}
}
}
2 changes: 2 additions & 0 deletions parallel.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ func appenderRoutine(bitmapChan chan<- *Bitmap, resultChan <-chan keyedContainer
make([]bool, 0, expectedKeys),
false,
},
nil,
}
for i := range keys {
if containers[i] != nil { // in case a resulting container was empty, see ParAnd function
Expand Down Expand Up @@ -440,6 +441,7 @@ func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap {
keys: make([]uint16, containerCount),
needCopyOnWrite: make([]bool, containerCount),
},
nil,
}

resultOffset := 0
Expand Down
52 changes: 32 additions & 20 deletions roaring.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ import (
// Bitmap represents a compressed bitmap where you can add integers.
type Bitmap struct {
highlowcontainer roaringArray

allocator Allocator
}

// ToBase64 serializes a bitmap as Base64
Expand All @@ -44,13 +46,13 @@ func (rb *Bitmap) FromBase64(str string) (int64, error) {
// implementations (Java, C) and is documented here:
// https://github.com/RoaringBitmap/RoaringFormatSpec
func (rb *Bitmap) WriteTo(stream io.Writer) (int64, error) {
return rb.highlowcontainer.writeTo(stream)
return rb.highlowcontainer.writeTo(stream, rb.allocator)
}

// ToBytes returns an array of bytes corresponding to what is written
// when calling WriteTo
func (rb *Bitmap) ToBytes() ([]byte, error) {
return rb.highlowcontainer.toBytes()
return rb.highlowcontainer.toBytes(rb.allocator)
}

// Checksum computes a hash (currently FNV-1a) for a bitmap that is suitable for
Expand All @@ -63,7 +65,7 @@ func (rb *Bitmap) ToBytes() ([]byte, error) {
func (rb *Bitmap) Checksum() uint64 {
const (
offset = 14695981039346656037
prime = 1099511628211
prime = 1099511628211
)

var bytes []byte
Expand Down Expand Up @@ -180,14 +182,22 @@ func (rb *Bitmap) UnmarshalBinary(data []byte) error {

// NewBitmap creates a new empty Bitmap (see also New)
func NewBitmap() *Bitmap {
return &Bitmap{}
return New()
}

// New creates a new empty Bitmap (same as NewBitmap)
func New() *Bitmap {
return &Bitmap{}
}

// NewWithAllocator creates a new empty Bitmap with the provided
// allocator which may be used for various allocations.
func NewWithAllocator(allocator Allocator) *Bitmap {
return &Bitmap{
allocator: allocator,
}
}

// Clear resets the Bitmap to be logically empty, but may retain
// some memory allocations that may speed up future operations
func (rb *Bitmap) Clear() {
Expand Down Expand Up @@ -276,9 +286,9 @@ type intIterator struct {
// This way, instead of making up-to 64k allocations per full iteration
// we get a single allocation and simply reinitialize the appropriate
// iterator and point to it in the generic `iter` member on each key bound.
shortIter shortIterator
runIter runIterator16
bitmapIter bitmapContainerShortIterator
shortIter shortIterator
runIter runIterator16
bitmapIter bitmapContainerShortIterator
}

// HasNext returns true if there are more integers to iterate over
Expand Down Expand Up @@ -341,7 +351,6 @@ func (ii *intIterator) AdvanceIfNeeded(minval uint32) {
// IntIterator is meant to allow you to iterate through the values of a bitmap, see Initialize(a *Bitmap)
type IntIterator = intIterator


// Initialize configures the existing iterator so that it can iterate through the values of
// the provided bitmap.
// The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove).
Expand All @@ -357,9 +366,9 @@ type intReverseIterator struct {
iter shortIterable
highlowcontainer *roaringArray

shortIter reverseIterator
runIter runReverseIterator16
bitmapIter reverseBitmapContainerShortIterator
shortIter reverseIterator
runIter runReverseIterator16
bitmapIter reverseBitmapContainerShortIterator
}

// HasNext returns true if there are more integers to iterate over
Expand Down Expand Up @@ -434,9 +443,9 @@ type manyIntIterator struct {
iter manyIterable
highlowcontainer *roaringArray

shortIter shortIterator
runIter runIterator16
bitmapIter bitmapContainerManyIterator
shortIter shortIterator
runIter runIterator16
bitmapIter bitmapContainerManyIterator
}

func (ii *manyIntIterator) init() {
Expand Down Expand Up @@ -495,7 +504,6 @@ func (ii *manyIntIterator) NextMany64(hs64 uint64, buf []uint64) int {
return n
}


// ManyIntIterator is meant to allow you to iterate through the values of a bitmap, see Initialize(a *Bitmap)
type ManyIntIterator = manyIntIterator

Expand Down Expand Up @@ -569,7 +577,7 @@ func (rb *Bitmap) Iterate(cb func(x uint32) bool) {
// Iterator creates a new IntPeekable to iterate over the integers contained in the bitmap, in sorted order;
// the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
func (rb *Bitmap) Iterator() IntPeekable {
p := new(intIterator)
p := new(intIterator)
p.Initialize(rb)
return p
}
Expand All @@ -592,7 +600,7 @@ func (rb *Bitmap) ManyIterator() ManyIntIterable {

// Clone creates a copy of the Bitmap
func (rb *Bitmap) Clone() *Bitmap {
ptr := new(Bitmap)
ptr := New()
ptr.highlowcontainer = *rb.highlowcontainer.clone()
return ptr
}
Expand Down Expand Up @@ -720,7 +728,7 @@ func (rb *Bitmap) Add(x uint32) {
c = ra.getWritableContainerAtIndex(i).iaddReturnMinimized(lowbits(x))
rb.highlowcontainer.setContainerAtIndex(i, c)
} else {
newac := newArrayContainer()
newac := rb.getNewArrayContainer()
rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, newac.iaddReturnMinimized(lowbits(x)))
}
}
Expand All @@ -736,7 +744,7 @@ func (rb *Bitmap) addwithptr(x uint32) (int, container) {
rb.highlowcontainer.setContainerAtIndex(i, c)
return i, c
}
newac := newArrayContainer()
newac := rb.getNewArrayContainer()
c = newac.iaddReturnMinimized(lowbits(x))
rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, c)
return -i - 1, c
Expand All @@ -754,7 +762,7 @@ func (rb *Bitmap) CheckedAdd(x uint32) bool {
rb.highlowcontainer.setContainerAtIndex(i, C)
return C.getCardinality() > oldcard
}
newac := newArrayContainer()
newac := rb.getNewArrayContainer()
rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, newac.iaddReturnMinimized(lowbits(x)))
return true

Expand Down Expand Up @@ -1713,3 +1721,7 @@ func (rb *Bitmap) Stats() Statistics {
}
return stats
}

func (rb *Bitmap) getNewArrayContainer() container {
return newArrayContainerFromAllocator(rb.allocator)
}
35 changes: 22 additions & 13 deletions roaringarray.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ import (
"bytes"
"encoding/binary"
"fmt"
"github.com/RoaringBitmap/roaring/internal"
"io"

"github.com/RoaringBitmap/roaring/internal"
)

type container interface {
Expand Down Expand Up @@ -468,21 +469,29 @@ func (ra *roaringArray) serializedSizeInBytes() uint64 {
//
// spec: https://github.com/RoaringBitmap/RoaringFormatSpec
//
func (ra *roaringArray) writeTo(w io.Writer) (n int64, err error) {
hasRun := ra.hasRunCompression()
isRunSizeInBytes := 0
cookieSize := 8
func (ra *roaringArray) writeTo(w io.Writer, allocator Allocator) (n int64, err error) {
var (
hasRun = ra.hasRunCompression()
isRunSizeInBytes = 0
cookieSize = 8
)
if hasRun {
cookieSize = 4
isRunSizeInBytes = (len(ra.keys) + 7) / 8
}
descriptiveHeaderSize := 4 * len(ra.keys)
preambleSize := cookieSize + isRunSizeInBytes + descriptiveHeaderSize

buf := make([]byte, preambleSize+4*len(ra.keys))

nw := 0

var (
descriptiveHeaderSize = 4 * len(ra.keys)
preambleSize = cookieSize + isRunSizeInBytes + descriptiveHeaderSize
bufSizeRequired = preambleSize + 4*len(ra.keys)
buf []byte
nw = 0
)
if allocator != nil {
buf = allocator.AllocateBytes(bufSizeRequired)
} else {
buf = make([]byte, bufSizeRequired)
}
if hasRun {
binary.LittleEndian.PutUint16(buf[0:], uint16(serialCookie))
nw += 2
Expand Down Expand Up @@ -547,9 +556,9 @@ func (ra *roaringArray) writeTo(w io.Writer) (n int64, err error) {
//
// spec: https://github.com/RoaringBitmap/RoaringFormatSpec
//
func (ra *roaringArray) toBytes() ([]byte, error) {
func (ra *roaringArray) toBytes(allocator Allocator) ([]byte, error) {
var buf bytes.Buffer
_, err := ra.writeTo(&buf)
_, err := ra.writeTo(&buf, allocator)
return buf.Bytes(), err
}

Expand Down