-
Notifications
You must be signed in to change notification settings - Fork 7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Dependency #10
Comments
Thanks for the heads up. I'll take a look. If the underlying library is buggy, it could be interesting from a backwards compatibility perspective. We'll see. It has been a while, but the line of code you asked about looks like it is just normalizing the endianess. But, I'll have to refresh on it too. |
@unwave There is a lot of material in your linked issue. Maybe you can point me to something specific. I'm interesting in if there is a value that isn't encoded correctly according to the "varints" used in gRPC. To help test, here is a playground of an alternative implementation: https://go.dev/play/p/WUBhGq1qQXQ Is there a value that is encoded one way with Go, but another way with |
VLQ and LEB128 differ only in endianness. Go, Google Protocol Buffers and My problem was that Gopackage main
import (
"encoding/binary"
"fmt"
)
// https://go.dev/src/encoding/binary/varint.go
func PutUvarint(buf []byte, x uint64) int {
i := 0
for x >= 0x80 {
buf[i] = byte(x) | 0x80
x >>= 7
i++
}
buf[i] = byte(x)
return i + 1
}
func to_bin_array(input []byte) []string {
array := make([]string, len(input))
for i, n := range input {
array[i] = fmt.Sprintf("%08b", n)
}
return array
}
func reverse(input []byte) []byte {
if len(input) == 0 {
return input
}
return append(reverse(input[1:]), input[0])
}
// https://en.wikipedia.org/wiki/Variable-length_quantity
func encode_vlq(buf []byte, x uint64) []byte {
i := 0
buf[i] = byte(x) & 0x7f
x >>= 7
i++
for x > 0 {
buf[i] = byte(x) | 0x80
x >>= 7
i++
}
return reverse(buf[:i])
}
func main() {
buf := make([]byte, binary.MaxVarintLen64)
a := []uint64{0, 127, 128, 8192, 16383, 16384, 2097151, 2097152, 134217728, 268435455, 106903}
var last_index int
for _, n := range a {
fmt.Println("LEB128")
last_index = PutUvarint(buf, n)
fmt.Println(buf[:last_index])
fmt.Println(to_bin_array(buf[:last_index]))
fmt.Println("VLQ")
arr := encode_vlq(buf, n)
fmt.Println(arr)
fmt.Println(to_bin_array(arr))
fmt.Println("---------------------")
}
} Pythondef _byte(b):
return bytes((b, ))
# https://github.com/fmoo/python-varint
def encode(number):
"""Pack `number` into varint bytes"""
buf = b''
while True:
towrite = number & 0x7f
number >>= 7
if number:
buf += _byte(towrite | 0x80)
else:
buf += _byte(towrite)
break
return buf
def encode_vlq(n):
""" https://en.wikipedia.org/wiki/Variable-length_quantity """
result = [n & 127]
n >>= 7
while n:
result.append(n & 127 | 128)
n >>= 7
return bytes(reversed(result))
def encode_leb128(n):
""" https://en.wikipedia.org/wiki/LEB128 """
result = []
while n >= 128:
result.append(n & 127 | 128)
n >>= 7
result.append(n)
return bytes(result)
a = [106903, 0, 127, 128, 8192, 16383, 16384, 2097151, 2097152, 134217728, 268435455]
for i in a:
print(list(encode(i)))
print(list(encode_leb128(i)))
print(list(encode_vlq(i)))
print() This will be the same: def encode_leb128(n):
""" https://en.wikipedia.org/wiki/LEB128 """
result = []
while n >= 128:
result.append(n & 127 | 128)
n >>= 7
result.append(n)
return bytes(result) timeitdef _byte(b):
return bytes((b, ))
def encode(number):
"""Pack `number` into varint bytes"""
buf = b''
while True:
towrite = number & 0x7f
number >>= 7
if number:
buf += _byte(towrite | 0x80)
else:
buf += _byte(towrite)
break
return buf
def encode_leb128(n):
""" https://en.wikipedia.org/wiki/LEB128 """
result = []
while n >= 128:
result.append(n & 127 | 128)
n >>= 7
result.append(n)
return bytes(result)
def encode_leb128_b(n):
""" https://en.wikipedia.org/wiki/LEB128 """
result = bytearray()
while n >= 128:
result.append(n & 127 | 128)
n >>= 7
result.append(n)
return result
def encode_leb128_b_2(n):
""" https://en.wikipedia.org/wiki/LEB128 """
result = b''
while n >= 128:
result += bytes((n & 127 | 128, ))
n >>= 7
result += bytes((n, ))
return result
def test(func):
for x in range(1000000):
func(x)
for x in range(1000000):
assert encode(x) == encode_leb128(x) == encode_leb128_b(x) == encode_leb128_b_2(x)
print('OK')
print()
import timeit
a = timeit.timeit("test(encode_leb128)", globals = globals(), number=10)
print("encode_leb128")
print(a)
a = timeit.timeit("test(encode)", globals = globals(), number=10)
print("fmoo/python-varint")
print(a)
a = timeit.timeit("test(encode_leb128_b)", globals = globals(), number=10)
print("encode_leb128_b")
print(a)
a = timeit.timeit("test(encode_leb128_b_2)", globals = globals(), number=10)
print("encode_leb128_b_2")
print(a)
|
Why would you normalize the endianness of a hash? Interesting:
Related to: Line 18 in 305b6fb
return SFTPAttributes.from_stat(os.fstat(self.readfile.fileno())) |
Check this: Different results fmoo/python-varint#9
You can use this. CC0 License.
py-imohash/imohash/imohash.py
Line 32 in 305b6fb
The text was updated successfully, but these errors were encountered: