Variable length quantity, unsigned/signed integer, base128, little-endian: Perl parsing library

A variable-length unsigned/signed integer using base128 encoding. 1-byte groups consist of 1-bit flag of continuation and 7-bit value chunk, and are ordered "least significant group first", i.e. in "little-endian" manner.

This particular encoding is specified and used in:

More information on this encoding is available at https://en.wikipedia.org/wiki/LEB128

This particular implementation supports integer values up to 64 bits (i.e. the maximum unsigned value supported is 2**64 - 1), which implies that serialized values can be up to 10 bytes in length.

If the most significant 10th byte (groups[9]) is present, its has_next must be false (otherwise we would have 11 or more bytes, which is not supported) and its value can be only 0 or 1 (because a 9-byte VLQ can represent 9 * 7 = 63 bits already, so the 10th byte can only add 1 bit, since only integers up to 64 bits are supported). These restrictions are enforced by this implementation. They were inspired by the Protoscope tool, see https://github.com/protocolbuffers/protoscope/blob/8e7a6aafa2c9958527b1e0747e66e1bfff045819/writer.go#L644-L648.

KS implementation details

License: CC0-1.0
Minimal Kaitai Struct required: 0.10

References

This page hosts a formal specification of Variable length quantity, unsigned/signed integer, base128, little-endian using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.

Perl source code to parse Variable length quantity, unsigned/signed integer, base128, little-endian

VlqBase128Le.pm

# This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild

use strict;
use warnings;
use IO::KaitaiStruct 0.009_000;

########################################################################
package VlqBase128Le;

our @ISA = 'IO::KaitaiStruct::Struct';

sub from_file {
    my ($class, $filename) = @_;
    my $fd;

    open($fd, '<', $filename) or return undef;
    binmode($fd);
    return new($class, IO::KaitaiStruct::Stream->new($fd));
}

sub new {
    my ($class, $_io, $_parent, $_root) = @_;
    my $self = IO::KaitaiStruct::Struct->new($_io);

    bless $self, $class;
    $self->{_parent} = $_parent;
    $self->{_root} = $_root || $self;;

    $self->_read();

    return $self;
}

sub _read {
    my ($self) = @_;

    $self->{groups} = ();
    do {
        $_ = VlqBase128Le::Group->new($self->{_io}, $self, $self->{_root});
        push @{$self->{groups}}, $_;
    } until (!($_->has_next()));
}

sub len {
    my ($self) = @_;
    return $self->{len} if ($self->{len});
    $self->{len} = scalar(@{$self->groups()});
    return $self->{len};
}

sub value {
    my ($self) = @_;
    return $self->{value} if ($self->{value});
    $self->{value} = @{$self->groups()}[-1]->interm_value();
    return $self->{value};
}

sub sign_bit {
    my ($self) = @_;
    return $self->{sign_bit} if ($self->{sign_bit});
    $self->{sign_bit} = ($self->len() == 10 ? 9223372036854775808 : (@{$self->groups()}[-1]->multiplier() * 64));
    return $self->{sign_bit};
}

sub value_signed {
    my ($self) = @_;
    return $self->{value_signed} if ($self->{value_signed});
    $self->{value_signed} = ( (($self->sign_bit() > 0) && ($self->value() >= $self->sign_bit()))  ? -(($self->sign_bit() - ($self->value() - $self->sign_bit()))) : $self->value());
    return $self->{value_signed};
}

sub groups {
    my ($self) = @_;
    return $self->{groups};
}

########################################################################
package VlqBase128Le::Group;

our @ISA = 'IO::KaitaiStruct::Struct';

sub from_file {
    my ($class, $filename) = @_;
    my $fd;

    open($fd, '<', $filename) or return undef;
    binmode($fd);
    return new($class, IO::KaitaiStruct::Stream->new($fd));
}

sub new {
    my ($class, $_io, $_parent, $_root) = @_;
    my $self = IO::KaitaiStruct::Struct->new($_io);

    bless $self, $class;
    $self->{_parent} = $_parent;
    $self->{_root} = $_root || $self;;

    $self->_read();

    return $self;
}

sub _read {
    my ($self) = @_;

    $self->{has_next} = $self->{_io}->read_bits_int_be(1);
    $self->{value} = $self->{_io}->read_bits_int_be(7);
}

sub interm_value {
    my ($self) = @_;
    return $self->{interm_value} if ($self->{interm_value});
    $self->{interm_value} = ($self->prev_interm_value() + ($self->value() * $self->multiplier()));
    return $self->{interm_value};
}

sub has_next {
    my ($self) = @_;
    return $self->{has_next};
}

sub value {
    my ($self) = @_;
    return $self->{value};
}

sub idx {
    my ($self) = @_;
    return $self->{idx};
}

sub prev_interm_value {
    my ($self) = @_;
    return $self->{prev_interm_value};
}

sub multiplier {
    my ($self) = @_;
    return $self->{multiplier};
}

1;