Variable length quantity, unsigned integer, base128, little-endian: Perl parsing library

A variable-length unsigned integer using base128 encoding. 1-byte groups consists of 1-bit flag of continuation and 7-bit value, and are ordered "least significant group first", i.e. in "little-endian" manner.

This particular encoding is specified and used in:

  • DWARF debug file format, where it's dubbed "unsigned LEB128" or "ULEB128". http://dwarfstd.org/doc/dwarf-2.0.0.pdf - page 139
  • Google Protocol Buffers, where it's called "Base 128 Varints". https://developers.google.com/protocol-buffers/docs/encoding?csw=1#varints
  • Apache Lucene, where it's called "VInt" http://lucene.apache.org/core/3_5_0/fileformats.html#VInt
  • Apache Avro uses this as a basis for integer encoding, adding ZigZag on top of it for signed ints http://avro.apache.org/docs/current/spec.html#binary_encode_primitive

More information on this encoding is available at https://en.wikipedia.org/wiki/LEB128

This particular implementation supports serialized values to up 8 bytes long.

KS implementation details

License: CC0-1.0
Minimal Kaitai Struct required: 0.7

This page hosts a formal specification of Variable length quantity, unsigned integer, base128, little-endian using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.

Perl source code to parse Variable length quantity, unsigned integer, base128, little-endian

VlqBase128Le.pm

# This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild

use strict;
use warnings;
use IO::KaitaiStruct 0.007_000;

########################################################################
package VlqBase128Le;

our @ISA = 'IO::KaitaiStruct::Struct';

sub from_file {
    my ($class, $filename) = @_;
    my $fd;

    open($fd, '<', $filename) or return undef;
    binmode($fd);
    return new($class, IO::KaitaiStruct::Stream->new($fd));
}

sub new {
    my ($class, $_io, $_parent, $_root) = @_;
    my $self = IO::KaitaiStruct::Struct->new($_io);

    bless $self, $class;
    $self->{_parent} = $_parent;
    $self->{_root} = $_root || $self;;

    $self->_read();

    return $self;
}

sub _read {
    my ($self) = @_;

    $self->{groups} = ();
    do {
        $_ = VlqBase128Le::Group->new($self->{_io}, $self, $self->{_root});
        push @{$self->{groups}}, $_;
    } until (!($_->has_next()));
}

sub len {
    my ($self) = @_;
    return $self->{len} if ($self->{len});
    $self->{len} = scalar(@{$self->groups()});
    return $self->{len};
}

sub value {
    my ($self) = @_;
    return $self->{value} if ($self->{value});
    $self->{value} = (((((((@{$self->groups()}[0]->value() + ($self->len() >= 2 ? (@{$self->groups()}[1]->value() << 7) : 0)) + ($self->len() >= 3 ? (@{$self->groups()}[2]->value() << 14) : 0)) + ($self->len() >= 4 ? (@{$self->groups()}[3]->value() << 21) : 0)) + ($self->len() >= 5 ? (@{$self->groups()}[4]->value() << 28) : 0)) + ($self->len() >= 6 ? (@{$self->groups()}[5]->value() << 35) : 0)) + ($self->len() >= 7 ? (@{$self->groups()}[6]->value() << 42) : 0)) + ($self->len() >= 8 ? (@{$self->groups()}[7]->value() << 49) : 0));
    return $self->{value};
}

sub groups {
    my ($self) = @_;
    return $self->{groups};
}

########################################################################
package VlqBase128Le::Group;

our @ISA = 'IO::KaitaiStruct::Struct';

sub from_file {
    my ($class, $filename) = @_;
    my $fd;

    open($fd, '<', $filename) or return undef;
    binmode($fd);
    return new($class, IO::KaitaiStruct::Stream->new($fd));
}

sub new {
    my ($class, $_io, $_parent, $_root) = @_;
    my $self = IO::KaitaiStruct::Struct->new($_io);

    bless $self, $class;
    $self->{_parent} = $_parent;
    $self->{_root} = $_root || $self;;

    $self->_read();

    return $self;
}

sub _read {
    my ($self) = @_;

    $self->{b} = $self->{_io}->read_u1();
}

sub has_next {
    my ($self) = @_;
    return $self->{has_next} if ($self->{has_next});
    $self->{has_next} = ($self->b() & 128) != 0;
    return $self->{has_next};
}

sub value {
    my ($self) = @_;
    return $self->{value} if ($self->{value});
    $self->{value} = ($self->b() & 127);
    return $self->{value};
}

sub b {
    my ($self) = @_;
    return $self->{b};
}

1;