Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dependency #10

Closed
unwave opened this issue Jan 12, 2022 · 4 comments
Closed

Dependency #10

unwave opened this issue Jan 12, 2022 · 4 comments

Comments

@unwave
Copy link

unwave commented Jan 12, 2022

def vlq_encode(n):
    """ https://en.wikipedia.org/wiki/Variable-length_quantity """
    result = [n & 127] # 127 -> 011111111
    n >>= 7
    while n:
        result.append(n & 127 | 128) # 128 -> 100000000
        n >>= 7
    return bytes(reversed(result))
@kalafut
Copy link
Owner

kalafut commented Jan 13, 2022

Thanks for the heads up. I'll take a look. If the underlying library is buggy, it could be interesting from a backwards compatibility perspective. We'll see.

It has been a while, but the line of code you asked about looks like it is just normalizing the endianess. But, I'll have to refresh on it too.

@kalafut
Copy link
Owner

kalafut commented Jan 13, 2022

@unwave There is a lot of material in your linked issue. Maybe you can point me to something specific. I'm interesting in if there is a value that isn't encoded correctly according to the "varints" used in gRPC. To help test, here is a playground of an alternative implementation: https://go.dev/play/p/WUBhGq1qQXQ

Is there a value that is encoded one way with Go, but another way with fmoo/python-varint?

@unwave
Copy link
Author

unwave commented Jan 13, 2022

VLQ and LEB128 differ only in endianness.

Go, Google Protocol Buffers and fmoo/python-varint use LEB128.

My problem was that fmoo/python-varint does not directly indicate which one is used. So not buggy but confusing.

Go
package main

import (
	"encoding/binary"
	"fmt"
)

// https://go.dev/src/encoding/binary/varint.go
func PutUvarint(buf []byte, x uint64) int {
	i := 0
	for x >= 0x80 {
		buf[i] = byte(x) | 0x80
		x >>= 7
		i++
	}
	buf[i] = byte(x)
	return i + 1
}

func to_bin_array(input []byte) []string {

	array := make([]string, len(input))

	for i, n := range input {
		array[i] = fmt.Sprintf("%08b", n)
	}

	return array
}

func reverse(input []byte) []byte {
	if len(input) == 0 {
		return input
	}
	return append(reverse(input[1:]), input[0])
}

// https://en.wikipedia.org/wiki/Variable-length_quantity
func encode_vlq(buf []byte, x uint64) []byte {
	i := 0

	buf[i] = byte(x) & 0x7f

	x >>= 7
	i++

	for x > 0 {
		buf[i] = byte(x) | 0x80
		x >>= 7
		i++
	}

	return reverse(buf[:i])
}

func main() {
	buf := make([]byte, binary.MaxVarintLen64)

	a := []uint64{0, 127, 128, 8192, 16383, 16384, 2097151, 2097152, 134217728, 268435455, 106903}

	var last_index int

	for _, n := range a {

		fmt.Println("LEB128")
		last_index = PutUvarint(buf, n)
		fmt.Println(buf[:last_index])
		fmt.Println(to_bin_array(buf[:last_index]))

		fmt.Println("VLQ")
		arr := encode_vlq(buf, n)
		fmt.Println(arr)
		fmt.Println(to_bin_array(arr))

		fmt.Println("---------------------")
	}
}
Python
def _byte(b):
    return bytes((b, ))

# https://github.com/fmoo/python-varint
def encode(number):
    """Pack `number` into varint bytes"""
    buf = b''
    while True:
        towrite = number & 0x7f
        number >>= 7
        if number:
            buf += _byte(towrite | 0x80)
        else:
            buf += _byte(towrite)
            break
    return buf
    

def encode_vlq(n):
    """ https://en.wikipedia.org/wiki/Variable-length_quantity """
    result = [n & 127]
    n >>= 7
    while n:
        result.append(n & 127 | 128)
        n >>= 7
    return bytes(reversed(result))


def encode_leb128(n):
    """ https://en.wikipedia.org/wiki/LEB128 """
    result = []
    while n >= 128:
        result.append(n & 127 | 128)
        n >>= 7
    result.append(n)
    return bytes(result)

a = [106903, 0, 127, 128, 8192, 16383, 16384, 2097151, 2097152, 134217728, 268435455]

for i in a:
    print(list(encode(i)))
    print(list(encode_leb128(i)))
    print(list(encode_vlq(i)))
    print()

This will be the same:

def encode_leb128(n):
    """ https://en.wikipedia.org/wiki/LEB128 """
    result = []
    while n >= 128:
        result.append(n & 127 | 128)
        n >>= 7
    result.append(n)
    return bytes(result)
timeit
def _byte(b):
    return bytes((b, ))

def encode(number):
    """Pack `number` into varint bytes"""
    buf = b''
    while True:
        towrite = number & 0x7f
        number >>= 7
        if number:
            buf += _byte(towrite | 0x80)
        else:
            buf += _byte(towrite)
            break
    return buf

def encode_leb128(n):
    """ https://en.wikipedia.org/wiki/LEB128 """
    result = []
    while n >= 128:
        result.append(n & 127 | 128)
        n >>= 7
    result.append(n)
    return bytes(result)
    
def encode_leb128_b(n):
    """ https://en.wikipedia.org/wiki/LEB128 """
    result = bytearray()
    while n >= 128:
        result.append(n & 127 | 128)
        n >>= 7
    result.append(n)
    return result
    
def encode_leb128_b_2(n):
    """ https://en.wikipedia.org/wiki/LEB128 """
    result = b''
    while n >= 128:
        result += bytes((n & 127 | 128, ))
        n >>= 7
    result += bytes((n, ))
    return result
    
 
def test(func):
    for x in range(1000000):
        func(x)


for x in range(1000000):
    assert encode(x) == encode_leb128(x) == encode_leb128_b(x) == encode_leb128_b_2(x)
print('OK')
print()

import timeit
   
a = timeit.timeit("test(encode_leb128)", globals = globals(), number=10)
print("encode_leb128")
print(a)

a = timeit.timeit("test(encode)", globals = globals(), number=10)
print("fmoo/python-varint")
print(a)

a = timeit.timeit("test(encode_leb128_b)", globals = globals(), number=10)
print("encode_leb128_b")
print(a)

a = timeit.timeit("test(encode_leb128_b_2)", globals = globals(), number=10)
print("encode_leb128_b_2")
print(a)
encode_leb128
9.1816136

fmoo/python-varint
16.764716399999998

encode_leb128_b
9.556593299999996

encode_leb128_b_2
13.369023799999994

@unwave unwave closed this as completed Jan 13, 2022
@unwave
Copy link
Author

unwave commented Apr 15, 2022

It has been a while, but the line of code you asked about looks like it is just normalizing the endianess. But, I'll have to refresh on it too.

Why would you normalize the endianness of a hash?

Interesting:

FIO19-C. Do not use fseek() and ftell() to compute the size of a regular file
https://wiki.sei.cmu.edu/confluence/display/c/FIO19-C.+Do+not+use+fseek%28%29+and+ftell%28%29+to+compute+the+size+of+a+regular+file

Related to:
f9fc302

f.seek(0, os.SEEK_END)

https://github.com/paramiko/paramiko/blob/609e01d550f62253c56c34fc5a49d01f1d49e6e2/tests/stub_sftp.py#L51

return SFTPAttributes.from_stat(os.fstat(self.readfile.fileno()))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants