Skip to content

Commit 9871d08

Browse files
committed
Always return PrimitiveFloatList, even when cold
The PrimitiveFloatList is an API which users should expect to rely on, so it is wrong to degrade to the more constrained List<Float> API while Fast-Avro is still cold. This commit introduces several changes to make the extended API reliably present whenever using Fast-Avro, regardless of being cold or warm. - Changed PrimitiveFloatList to an interface, in a new package called: com.linkedin.avro.api; since the package name migration makes this an incompatible change, it would be desirable for the next release to not increment only the patch version. Having a proper package name for API extension should make things cleaner in the future as we add other optimized APIs (e.g. PR linkedin#45). - Renamed the old class to ByteBufferBackedPrimitiveFloatList, and made it implement the new interface. - Added new several new classes to ensure that the PrimitiveFloatList is always returned even when Fast-Avro falls back to vanilla Avro: - ColdPrimitiveFloatList which is a naive implementation that simply implements the new API by delegating to the regular Avro functions. This does not provide any GC benefits, but at least maintains the API. - ColdGenericDatumReader and ColdSpecificDatumReader which extend the GenericDatumReader and SpecificDatumReader classes, respectively, from vanilla Avro. - ColdDatumReaderMixIn which provides a utility function to minimize repeated code between the two DatumReader functions. - Significantly refactored the FastGenericDeserializerGeneratorTest so that it tests three permutations: vanilla, cold fast and warm fast. As part of doing this, several test short-comings were discovered and fixed. In particular, the decodeRecordSlow function had some flipped parameters which led to test failures on vanilla Avro, and those test failures were hidden by the fact that some tests ignored the provided permutation param and systematically tested only Fast-Avro.
1 parent 10bef2f commit 9871d08

10 files changed

+321
-181
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package com.linkedin.avro.api;
2+
3+
import java.util.List;
4+
5+
/**
6+
* A {@link List} implementation with additional functions to prevent boxing.
7+
*/
8+
public interface PrimitiveFloatList extends List<Float> {
9+
/**
10+
* @param index index of the element to return
11+
* @return the element at the specified position in this list
12+
*/
13+
float getPrimitive(int index);
14+
15+
/**
16+
* @param e element whose presence in this collection is to be ensured
17+
* @return <tt>true</tt> if this collection changed as a result of the call
18+
*/
19+
boolean addPrimitive(float e);
20+
}

avro-fastserde/src/main/java/com/linkedin/avro/fastserde/PrimitiveFloatList.java avro-fastserde/src/main/java/com/linkedin/avro/fastserde/ByteBufferBackedPrimitiveFloatList.java

+17-16
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package com.linkedin.avro.fastserde;
22

3+
import com.linkedin.avro.api.PrimitiveFloatList;
34
import java.io.IOException;
45
import java.nio.ByteBuffer;
56
import java.util.AbstractList;
@@ -33,8 +34,8 @@
3334
*
3435
* TODO: Provide arrays for other primitive types.
3536
*/
36-
public class PrimitiveFloatList extends AbstractList<Float>
37-
implements GenericArray<Float>, Comparable<GenericArray<Float>> {
37+
public class ByteBufferBackedPrimitiveFloatList extends AbstractList<Float>
38+
implements GenericArray<Float>, Comparable<GenericArray<Float>>, PrimitiveFloatList {
3839
private static final float[] EMPTY = new float[0];
3940
private static final int FLOAT_SIZE = Float.BYTES;
4041
private static final Schema FLOAT_SCHEMA = Schema.create(Schema.Type.FLOAT);
@@ -44,15 +45,15 @@ public class PrimitiveFloatList extends AbstractList<Float>
4445
private boolean isCached = false;
4546
private CompositeByteBuffer byteBuffer;
4647

47-
public PrimitiveFloatList(int capacity) {
48+
public ByteBufferBackedPrimitiveFloatList(int capacity) {
4849
if (capacity != 0) {
4950
elements = new float[capacity];
5051
}
5152
// create empty ByteBuffer if capacity != 0 ( List<Float> interface usage case)
5253
byteBuffer = new CompositeByteBuffer(capacity != 0);
5354
}
5455

55-
public PrimitiveFloatList(Collection<Float> c) {
56+
public ByteBufferBackedPrimitiveFloatList(Collection<Float> c) {
5657
if (c != null) {
5758
elements = new float[c.size()];
5859
addAll(c);
@@ -61,21 +62,21 @@ public PrimitiveFloatList(Collection<Float> c) {
6162
}
6263

6364
/**
64-
* Instantiate (or re-use) and populate a {@link PrimitiveFloatList} from a {@link org.apache.avro.io.Decoder}.
65+
* Instantiate (or re-use) and populate a {@link ByteBufferBackedPrimitiveFloatList} from a {@link org.apache.avro.io.Decoder}.
6566
*
6667
* N.B.: the caller must ensure the data is of the appropriate type by calling {@link #isFloatArray(Schema)}.
6768
*
68-
* @param old old {@link PrimitiveFloatList} to reuse
69+
* @param old old {@link ByteBufferBackedPrimitiveFloatList} to reuse
6970
* @param in {@link org.apache.avro.io.Decoder} to read new list from
70-
* @return a {@link PrimitiveFloatList} with data, possibly the old argument reused
71+
* @return a {@link ByteBufferBackedPrimitiveFloatList} with data, possibly the old argument reused
7172
* @throws IOException on io errors
7273
*/
7374
public static Object readPrimitiveFloatArray(Object old, Decoder in) throws IOException {
7475
long length = in.readArrayStart();
7576
long totalLength = 0;
7677

7778
if (length > 0) {
78-
PrimitiveFloatList array = (PrimitiveFloatList) newPrimitiveFloatArray(old);
79+
ByteBufferBackedPrimitiveFloatList array = (ByteBufferBackedPrimitiveFloatList) newPrimitiveFloatArray(old);
7980
int index = 0;
8081

8182
do {
@@ -90,11 +91,11 @@ public static Object readPrimitiveFloatArray(Object old, Decoder in) throws IOEx
9091
setupElements(array, (int)totalLength);
9192
return array;
9293
} else {
93-
return new PrimitiveFloatList(0);
94+
return new ByteBufferBackedPrimitiveFloatList(0);
9495
}
9596
}
9697

97-
private static void setupElements(PrimitiveFloatList list, int totalSize) {
98+
private static void setupElements(ByteBufferBackedPrimitiveFloatList list, int totalSize) {
9899
if (list.elements.length != 0) {
99100
if (totalSize <= list.getCapacity()) {
100101
// reuse the float array directly
@@ -111,7 +112,7 @@ private static void setupElements(PrimitiveFloatList list, int totalSize) {
111112

112113
/**
113114
* @param expected {@link Schema} to inspect
114-
* @return true if the {@code expected} SCHEMA is of the right type to decode as a {@link PrimitiveFloatList}
115+
* @return true if the {@code expected} SCHEMA is of the right type to decode as a {@link ByteBufferBackedPrimitiveFloatList}
115116
* false otherwise
116117
*/
117118
public static boolean isFloatArray(Schema expected) {
@@ -120,15 +121,15 @@ public static boolean isFloatArray(Schema expected) {
120121
}
121122

122123
private static Object newPrimitiveFloatArray(Object old) {
123-
if (old instanceof PrimitiveFloatList) {
124-
PrimitiveFloatList oldFloatList = (PrimitiveFloatList) old;
124+
if (old instanceof ByteBufferBackedPrimitiveFloatList) {
125+
ByteBufferBackedPrimitiveFloatList oldFloatList = (ByteBufferBackedPrimitiveFloatList) old;
125126
oldFloatList.byteBuffer.clear();
126127
oldFloatList.isCached = false;
127128
oldFloatList.size = 0;
128129
return oldFloatList;
129130
} else {
130131
// Just a place holder, will set up the elements later.
131-
return new PrimitiveFloatList(0);
132+
return new ByteBufferBackedPrimitiveFloatList(0);
132133
}
133134
}
134135

@@ -282,8 +283,8 @@ public Float peek() {
282283
@Override
283284
public int compareTo(GenericArray<Float> that) {
284285
cacheFromByteBuffer();
285-
if (that instanceof PrimitiveFloatList) {
286-
PrimitiveFloatList thatPrimitiveList = (PrimitiveFloatList) that;
286+
if (that instanceof ByteBufferBackedPrimitiveFloatList) {
287+
ByteBufferBackedPrimitiveFloatList thatPrimitiveList = (ByteBufferBackedPrimitiveFloatList) that;
287288
if (this.size == thatPrimitiveList.size) {
288289
for (int i = 0; i < this.size; i++) {
289290
int compare = Float.compare(this.elements[i], thatPrimitiveList.elements[i]);

avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastDeserializerGenerator.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -589,10 +589,10 @@ private void processArray(JVar arraySchemaVar, final String name, final Schema a
589589

590590
final JVar arrayVar = action.getShouldRead() ? declareValueVar(name, arraySchema, parentBody) : null;
591591
/**
592-
* Special optimization for float array by leveraging {@link PrimitiveFloatList}.
592+
* Special optimization for float array by leveraging {@link ByteBufferBackedPrimitiveFloatList}.
593593
*/
594594
if (action.getShouldRead() && arraySchema.getElementType().getType().equals(Schema.Type.FLOAT)) {
595-
JClass primitiveFloatList = codeModel.ref(PrimitiveFloatList.class);
595+
JClass primitiveFloatList = codeModel.ref(ByteBufferBackedPrimitiveFloatList.class);
596596
JExpression readPrimitiveFloatArrayInvocation = primitiveFloatList.staticInvoke("readPrimitiveFloatArray").
597597
arg(reuseSupplier.get()).arg(JExpr.direct(DECODER));
598598
JExpression castedResult =

avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastSerdeCache.java

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package com.linkedin.avro.fastserde;
22

3+
import org.apache.avro.generic.ColdGenericDatumReader;
4+
import org.apache.avro.generic.ColdSpecificDatumReader;
35
import java.io.File;
46
import java.io.IOException;
57
import java.lang.reflect.ParameterizedType;
@@ -478,7 +480,7 @@ public static class FastDeserializerWithAvroSpecificImpl<V> implements FastDeser
478480
private final SpecificDatumReader<V> datumReader;
479481

480482
public FastDeserializerWithAvroSpecificImpl(Schema writerSchema, Schema readerSchema) {
481-
this.datumReader = new SpecificDatumReader<>(writerSchema, readerSchema);
483+
this.datumReader = new ColdSpecificDatumReader<>(writerSchema, readerSchema);
482484
}
483485

484486
@Override
@@ -491,7 +493,7 @@ public static class FastDeserializerWithAvroGenericImpl<V> implements FastDeseri
491493
private final GenericDatumReader<V> datumReader;
492494

493495
public FastDeserializerWithAvroGenericImpl(Schema writerSchema, Schema readerSchema) {
494-
this.datumReader = new GenericDatumReader<>(writerSchema, readerSchema);
496+
this.datumReader = new ColdGenericDatumReader<>(writerSchema, readerSchema);
495497
}
496498

497499
@Override
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
package com.linkedin.avro.fastserde.coldstart;
2+
3+
import com.linkedin.avro.api.PrimitiveFloatList;
4+
import org.apache.avro.Schema;
5+
import org.apache.avro.generic.GenericData;
6+
7+
/**
8+
* A {@link PrimitiveFloatList} implementation which is equivalent in all respect to the vanilla Avro
9+
* implementation, both in terms of functionality and (lack of) performance. It provides the primitive
10+
* API that the interface requires, but actually just returns an unboxed Float Object, thus providing
11+
* no GC benefit. This should be possible to improve upon in the future, however.
12+
*
13+
* The main motivation for this class is merely to provide a guarantee that the extended API is always
14+
* available, even when Fast-Avro isn't warmed up yet.
15+
*/
16+
public class ColdPrimitiveFloatList extends GenericData.Array<Float> implements PrimitiveFloatList {
17+
private static final Schema SCHEMA = Schema.createArray(Schema.create(Schema.Type.FLOAT));
18+
public ColdPrimitiveFloatList(int capacity) {
19+
super(capacity, SCHEMA);
20+
}
21+
22+
@Override
23+
public float getPrimitive(int index) {
24+
return get(index);
25+
}
26+
27+
@Override
28+
public boolean addPrimitive(float o) {
29+
return add(o);
30+
}
31+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
package org.apache.avro.generic;
2+
3+
import com.linkedin.avro.fastserde.coldstart.ColdPrimitiveFloatList;
4+
import java.util.Collection;
5+
import org.apache.avro.Schema;
6+
7+
8+
/**
9+
* An interface with default implementation in order to defeat the lack of multiple inheritance.
10+
*/
11+
public interface ColdDatumReaderMixIn {
12+
default Object newArray(Object old, int size, Schema schema, NewArrayFunction fallBackFunction) {
13+
switch (schema.getElementType().getType()) {
14+
case FLOAT:
15+
if (null == old || !(old instanceof ColdPrimitiveFloatList)) {
16+
return new ColdPrimitiveFloatList(size);
17+
}
18+
((Collection) old).clear();
19+
return old;
20+
// TODO: Add more cases when we support more primitive array types
21+
default:
22+
return fallBackFunction.newArray(old, size, schema);
23+
}
24+
}
25+
26+
interface NewArrayFunction {
27+
Object newArray(Object old, int size, Schema schema);
28+
}
29+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
package org.apache.avro.generic;
2+
3+
import org.apache.avro.Schema;
4+
5+
6+
/**
7+
* A light-weight extension of {@link GenericDatumReader} which merely ensures that the types of the
8+
* extended API are always returned.
9+
*
10+
* This class needs to be in the org.apache.avro.generic package in order to access protected methods.
11+
*/
12+
public class ColdGenericDatumReader<T> extends GenericDatumReader<T> implements ColdDatumReaderMixIn {
13+
public ColdGenericDatumReader(Schema writerSchema, Schema readerSchema) {
14+
super(writerSchema, readerSchema);
15+
}
16+
17+
@Override
18+
protected Object newArray(Object old, int size, Schema schema) {
19+
return newArray(old, size, schema, super::newArray);
20+
}
21+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
package org.apache.avro.generic;
2+
3+
import org.apache.avro.Schema;
4+
import org.apache.avro.specific.SpecificDatumReader;
5+
6+
7+
/**
8+
* A light-weight extension of {@link SpecificDatumReader} which merely ensures that the types of
9+
* the extended API are always returned.
10+
*
11+
* This class needs to be in the org.apache.avro.generic package in order to access protected methods.
12+
*/
13+
public class ColdSpecificDatumReader<T> extends SpecificDatumReader<T> implements ColdDatumReaderMixIn {
14+
public ColdSpecificDatumReader(Schema writerSchema, Schema readerSchema) {
15+
super(writerSchema, readerSchema);
16+
}
17+
18+
@Override
19+
protected Object newArray(Object old, int size, Schema schema) {
20+
return newArray(old, size, schema, super::newArray);
21+
}
22+
}

avro-fastserde/src/test/java/com/linkedin/avro/fastserde/FastDeserializerDefaultsTest.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ public void testPrimitiveFloatListAddPrimitive() {
8787
long startTime = System.currentTimeMillis();
8888

8989
for (int i = 0; i < iteration; i++) {
90-
PrimitiveFloatList list = new PrimitiveFloatList(array_size);
90+
ByteBufferBackedPrimitiveFloatList list = new ByteBufferBackedPrimitiveFloatList(array_size);
9191

9292
for (int l = 0; l < array_size; l++) {
9393
list.addPrimitive((float) l);

0 commit comments

Comments
 (0)