Variable length quantity, unsigned/signed integer, base128, little-endian: Go parsing library

A variable-length unsigned/signed integer using base128 encoding. 1-byte groups consist of 1-bit flag of continuation and 7-bit value chunk, and are ordered "least significant group first", i.e. in "little-endian" manner.

This particular encoding is specified and used in:

DWARF debug file format, where it's dubbed "unsigned LEB128" or "ULEB128". https://dwarfstd.org/doc/dwarf-2.0.0.pdf - page 139
Google Protocol Buffers, where it's called "Base 128 Varints". https://protobuf.dev/programming-guides/encoding/#varints
Apache Lucene, where it's called "VInt" https://lucene.apache.org/core/3_5_0/fileformats.html#VInt
Apache Avro uses this as a basis for integer encoding, adding ZigZag on top of it for signed ints https://avro.apache.org/docs/1.12.0/specification/#primitive-types-1

More information on this encoding is available at https://en.wikipedia.org/wiki/LEB128

This particular implementation supports integer values up to 64 bits (i.e. the maximum unsigned value supported is 2**64 - 1), which implies that serialized values can be up to 10 bytes in length.

If the most significant 10th byte (groups[9]) is present, its has_next must be false (otherwise we would have 11 or more bytes, which is not supported) and its value can be only 0 or 1 (because a 9-byte VLQ can represent 9 * 7 = 63 bits already, so the 10th byte can only add 1 bit, since only integers up to 64 bits are supported). These restrictions are enforced by this implementation. They were inspired by the Protoscope tool, see https://github.com/protocolbuffers/protoscope/blob/8e7a6aafa2c9958527b1e0747e66e1bfff045819/writer.go#L644-L648.

KS implementation details

License: CC0-1.0

Minimal Kaitai Struct required: 0.10

References

This page hosts a formal specification of Variable length quantity, unsigned/signed integer, base128, little-endian using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.

Go source code to parse Variable length quantity, unsigned/signed integer, base128, little-endian

vlq_base128_le.go

Download

// Code generated by kaitai-struct-compiler from a .ksy source file. DO NOT EDIT.

import "github.com/kaitai-io/kaitai_struct_go_runtime/kaitai"


/**
 * A variable-length unsigned/signed integer using base128 encoding. 1-byte groups
 * consist of 1-bit flag of continuation and 7-bit value chunk, and are ordered
 * "least significant group first", i.e. in "little-endian" manner.
 * 
 * This particular encoding is specified and used in:
 * 
 * * DWARF debug file format, where it's dubbed "unsigned LEB128" or "ULEB128".
 *   <https://dwarfstd.org/doc/dwarf-2.0.0.pdf> - page 139
 * * Google Protocol Buffers, where it's called "Base 128 Varints".
 *   <https://protobuf.dev/programming-guides/encoding/#varints>
 * * Apache Lucene, where it's called "VInt"
 *   <https://lucene.apache.org/core/3_5_0/fileformats.html#VInt>
 * * Apache Avro uses this as a basis for integer encoding, adding ZigZag on
 *   top of it for signed ints
 *   <https://avro.apache.org/docs/1.12.0/specification/#primitive-types-1>
 * 
 * More information on this encoding is available at <https://en.wikipedia.org/wiki/LEB128>
 * 
 * This particular implementation supports integer values up to 64 bits (i.e. the
 * maximum unsigned value supported is `2**64 - 1`), which implies that serialized
 * values can be up to 10 bytes in length.
 * 
 * If the most significant 10th byte (`groups[9]`) is present, its `has_next`
 * must be `false` (otherwise we would have 11 or more bytes, which is not
 * supported) and its `value` can be only `0` or `1` (because a 9-byte VLQ can
 * represent `9 * 7 = 63` bits already, so the 10th byte can only add 1 bit,
 * since only integers up to 64 bits are supported). These restrictions are
 * enforced by this implementation. They were inspired by the Protoscope tool,
 * see <https://github.com/protocolbuffers/protoscope/blob/8e7a6aafa2c9958527b1e0747e66e1bfff045819/writer.go#L644-L648>.
 */
type VlqBase128Le struct {
	Groups []*VlqBase128Le_Group
	_io *kaitai.Stream
	_root *VlqBase128Le
	_parent kaitai.Struct
	_f_len bool
	len int
	_f_signBit bool
	signBit uint64
	_f_value bool
	value uint64
	_f_valueSigned bool
	valueSigned int64
}
func NewVlqBase128Le() *VlqBase128Le {
	return &VlqBase128Le{
	}
}

func (this VlqBase128Le) IO_() *kaitai.Stream {
	return this._io
}

func (this *VlqBase128Le) Read(io *kaitai.Stream, parent kaitai.Struct, root *VlqBase128Le) (err error) {
	this._io = io
	this._parent = parent
	this._root = root

	for i := 1;; i++ {
		var tmp1 uint64;
		if (i != 0) {
			tmp2, err := this.Groups[i - 1].IntermValue()
			if err != nil {
				return err
			}
			tmp1 = tmp2
		} else {
			tmp1 = 0
		}
		var tmp3 int;
		if (i != 0) {
			var tmp4 int;
			if (i == 9) {
				tmp4 = uint64(9223372036854775808)
			} else {
				tmp4 = this.Groups[i - 1].Multiplier * 128
			}
			tmp3 = tmp4
		} else {
			tmp3 = 1
		}
		tmp5 := NewVlqBase128Le_Group(i, tmp1, tmp3)
		err = tmp5.Read(this._io, this, this._root)
		if err != nil {
			return err
		}
		_it := tmp5
		this.Groups = append(this.Groups, _it)
		if !(_it.HasNext) {
			break
		}
	}
	return err
}
func (this *VlqBase128Le) Len() (v int, err error) {
	if (this._f_len) {
		return this.len, nil
	}
	this._f_len = true
	this.len = int(len(this.Groups))
	return this.len, nil
}
func (this *VlqBase128Le) SignBit() (v uint64, err error) {
	if (this._f_signBit) {
		return this.signBit, nil
	}
	this._f_signBit = true
	var tmp6 int;
	tmp7, err := this.Len()
	if err != nil {
		return 0, err
	}
	if (tmp7 == 10) {
		tmp6 = uint64(9223372036854775808)
	} else {
		tmp8 := this.Groups
		tmp6 = tmp8[len(tmp8) - 1].Multiplier * 64
	}
	this.signBit = uint64(uint64(tmp6))
	return this.signBit, nil
}

/**
 * Resulting unsigned value as normal integer
 */
func (this *VlqBase128Le) Value() (v uint64, err error) {
	if (this._f_value) {
		return this.value, nil
	}
	this._f_value = true
	tmp9 := this.Groups
	tmp10, err := tmp9[len(tmp9) - 1].IntermValue()
	if err != nil {
		return 0, err
	}
	this.value = uint64(tmp10)
	return this.value, nil
}
func (this *VlqBase128Le) ValueSigned() (v int64, err error) {
	if (this._f_valueSigned) {
		return this.valueSigned, nil
	}
	this._f_valueSigned = true
	var tmp11 int64;
	tmp12, err := this.SignBit()
	if err != nil {
		return 0, err
	}
	tmp13, err := this.Value()
	if err != nil {
		return 0, err
	}
	tmp14, err := this.SignBit()
	if err != nil {
		return 0, err
	}
	if ( ((tmp12 > 0) && (tmp13 >= tmp14)) ) {
		tmp15, err := this.SignBit()
		if err != nil {
			return 0, err
		}
		tmp16, err := this.Value()
		if err != nil {
			return 0, err
		}
		tmp17, err := this.SignBit()
		if err != nil {
			return 0, err
		}
		tmp11 = -(int64(tmp15 - (tmp16 - tmp17)))
	} else {
		tmp18, err := this.Value()
		if err != nil {
			return 0, err
		}
		tmp11 = int64(tmp18)
	}
	this.valueSigned = int64(tmp11)
	return this.valueSigned, nil
}

/**
 * One byte group, clearly divided into 7-bit "value" chunk and 1-bit "continuation" flag.
 */
type VlqBase128Le_Group struct {
	HasNext bool
	Value uint64
	Idx int32
	PrevIntermValue uint64
	Multiplier uint64
	_io *kaitai.Stream
	_root *VlqBase128Le
	_parent *VlqBase128Le
	_f_intermValue bool
	intermValue uint64
}
func NewVlqBase128Le_Group(idx int32, prevIntermValue uint64, multiplier uint64) *VlqBase128Le_Group {
	return &VlqBase128Le_Group{
		Idx: idx,
		PrevIntermValue: prevIntermValue,
		Multiplier: multiplier,
	}
}

func (this VlqBase128Le_Group) IO_() *kaitai.Stream {
	return this._io
}

func (this *VlqBase128Le_Group) Read(io *kaitai.Stream, parent *VlqBase128Le, root *VlqBase128Le) (err error) {
	this._io = io
	this._parent = parent
	this._root = root

	tmp19, err := this._io.ReadBitsIntBe(1)
	if err != nil {
		return err
	}
	this.HasNext = tmp19 != 0
	var tmp20 bool;
	if (this.Idx == 9) {
		tmp20 = false
	} else {
		tmp20 = this.HasNext
	}
	var tmp21 bool;
	if (this.Idx == 9) {
		tmp21 = false
	} else {
		tmp21 = this.HasNext
	}
	if !(this.HasNext == tmp20) {
		return kaitai.NewValidationNotEqualError(tmp21, this.HasNext, this._io, "/types/group/seq/0")
	}
	tmp22, err := this._io.ReadBitsIntBe(7)
	if err != nil {
		return err
	}
	this.Value = tmp22
	var tmp23 int8;
	if (this.Idx == 9) {
		tmp23 = 1
	} else {
		tmp23 = 127
	}
	var tmp24 int8;
	if (this.Idx == 9) {
		tmp24 = 1
	} else {
		tmp24 = 127
	}
	if !(this.Value <= uint64(tmp23)) {
		return kaitai.NewValidationGreaterThanError(uint64(tmp24), this.Value, this._io, "/types/group/seq/1")
	}
	return err
}
func (this *VlqBase128Le_Group) IntermValue() (v uint64, err error) {
	if (this._f_intermValue) {
		return this.intermValue, nil
	}
	this._f_intermValue = true
	this.intermValue = uint64(uint64(this.PrevIntermValue + this.Value * this.Multiplier))
	return this.intermValue, nil
}

/**
 * If `true`, then we have more bytes to read.
 * 
 * Since this implementation only supports serialized values up to 10
 * bytes, this must be `false` in the 10th group (`groups[9]`).
 */

/**
 * The 7-bit (base128) numeric value chunk of this group
 * 
 * Since this implementation only supports integer values up to 64 bits,
 * the `value` in the 10th group (`groups[9]`) can only be `0` or `1`
 * (otherwise the width of the represented value would be 65 bits or
 * more, which is not supported).
 */