ruby_marshal: PHP parsing library

Ruby's Marshal module allows serialization and deserialization of many standard and arbitrary Ruby objects in a compact binary format. It is relatively fast, available in stdlibs standard and allows conservation of language-specific properties (such as symbols or encoding-aware strings).

Feature-wise, it is comparable to other language-specific implementations, such as:

From internal perspective, serialized stream consists of a simple magic header and a record.

KS implementation details

License: CC0-1.0

This page hosts a formal specification of ruby_marshal using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.

PHP source code to parse ruby_marshal

RubyMarshal.php

<?php
// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild

/**
 * Ruby's Marshal module allows serialization and deserialization of
 * many standard and arbitrary Ruby objects in a compact binary
 * format. It is relatively fast, available in stdlibs standard and
 * allows conservation of language-specific properties (such as symbols
 * or encoding-aware strings).
 * 
 * Feature-wise, it is comparable to other language-specific
 * implementations, such as:
 * 
 * * Java's
 *   [Serializable](https://docs.oracle.com/javase/8/docs/api/java/io/Serializable.html)
 * * .NET
 *   [BinaryFormatter](https://learn.microsoft.com/en-us/dotnet/api/system.runtime.serialization.formatters.binary.binaryformatter?view=net-7.0)
 * * Python's
 *   [marshal](https://docs.python.org/3/library/marshal.html),
 *   [pickle](https://docs.python.org/3/library/pickle.html) and
 *   [shelve](https://docs.python.org/3/library/shelve.html)
 * 
 * From internal perspective, serialized stream consists of a simple
 * magic header and a record.
 */

namespace {
    class RubyMarshal extends \Kaitai\Struct\Struct {
        public function __construct(\Kaitai\Struct\Stream $_io, \Kaitai\Struct\Struct $_parent = null, \RubyMarshal $_root = null) {
            parent::__construct($_io, $_parent, $_root);
            $this->_read();
        }

        private function _read() {
            $this->_m_version = $this->_io->readBytes(2);
            if (!($this->version() == "\x04\x08")) {
                throw new \Kaitai\Struct\Error\ValidationNotEqualError("\x04\x08", $this->version(), $this->_io(), "/seq/0");
            }
            $this->_m_records = new \RubyMarshal\Record($this->_io, $this, $this->_root);
        }
        protected $_m_version;
        protected $_m_records;
        public function version() { return $this->_m_version; }
        public function records() { return $this->_m_records; }
    }
}

namespace RubyMarshal {
    class RubyArray extends \Kaitai\Struct\Struct {
        public function __construct(\Kaitai\Struct\Stream $_io, \RubyMarshal\Record $_parent = null, \RubyMarshal $_root = null) {
            parent::__construct($_io, $_parent, $_root);
            $this->_read();
        }

        private function _read() {
            $this->_m_numElements = new \RubyMarshal\PackedInt($this->_io, $this, $this->_root);
            $this->_m_elements = [];
            $n = $this->numElements()->value();
            for ($i = 0; $i < $n; $i++) {
                $this->_m_elements[] = new \RubyMarshal\Record($this->_io, $this, $this->_root);
            }
        }
        protected $_m_numElements;
        protected $_m_elements;
        public function numElements() { return $this->_m_numElements; }
        public function elements() { return $this->_m_elements; }
    }
}

namespace RubyMarshal {
    class Bignum extends \Kaitai\Struct\Struct {
        public function __construct(\Kaitai\Struct\Stream $_io, \RubyMarshal\Record $_parent = null, \RubyMarshal $_root = null) {
            parent::__construct($_io, $_parent, $_root);
            $this->_read();
        }

        private function _read() {
            $this->_m_sign = $this->_io->readU1();
            $this->_m_lenDiv2 = new \RubyMarshal\PackedInt($this->_io, $this, $this->_root);
            $this->_m_body = $this->_io->readBytes(($this->lenDiv2()->value() * 2));
        }
        protected $_m_sign;
        protected $_m_lenDiv2;
        protected $_m_body;

        /**
         * A single byte containing `+` for a positive value or `-` for a negative value.
         */
        public function sign() { return $this->_m_sign; }

        /**
         * Length of bignum body, divided by 2.
         */
        public function lenDiv2() { return $this->_m_lenDiv2; }

        /**
         * Bytes that represent the number, see ruby-lang.org docs for reconstruction algorithm.
         */
        public function body() { return $this->_m_body; }
    }
}

namespace RubyMarshal {
    class RubyStruct extends \Kaitai\Struct\Struct {
        public function __construct(\Kaitai\Struct\Stream $_io, \RubyMarshal\Record $_parent = null, \RubyMarshal $_root = null) {
            parent::__construct($_io, $_parent, $_root);
            $this->_read();
        }

        private function _read() {
            $this->_m_name = new \RubyMarshal\Record($this->_io, $this, $this->_root);
            $this->_m_numMembers = new \RubyMarshal\PackedInt($this->_io, $this, $this->_root);
            $this->_m_members = [];
            $n = $this->numMembers()->value();
            for ($i = 0; $i < $n; $i++) {
                $this->_m_members[] = new \RubyMarshal\Pair($this->_io, $this, $this->_root);
            }
        }
        protected $_m_name;
        protected $_m_numMembers;
        protected $_m_members;

        /**
         * Symbol containing the name of the struct.
         */
        public function name() { return $this->_m_name; }

        /**
         * Number of members in a struct
         */
        public function numMembers() { return $this->_m_numMembers; }
        public function members() { return $this->_m_members; }
    }
}

namespace RubyMarshal {
    class RubySymbol extends \Kaitai\Struct\Struct {
        public function __construct(\Kaitai\Struct\Stream $_io, \RubyMarshal\Record $_parent = null, \RubyMarshal $_root = null) {
            parent::__construct($_io, $_parent, $_root);
            $this->_read();
        }

        private function _read() {
            $this->_m_len = new \RubyMarshal\PackedInt($this->_io, $this, $this->_root);
            $this->_m_name = \Kaitai\Struct\Stream::bytesToStr($this->_io->readBytes($this->len()->value()), "UTF-8");
        }
        protected $_m_len;
        protected $_m_name;
        public function len() { return $this->_m_len; }
        public function name() { return $this->_m_name; }
    }
}

/**
 * Ruby uses sophisticated system to pack integers: first `code`
 * byte either determines packing scheme or carries encoded
 * immediate value (thus allowing smaller values from -123 to 122
 * (inclusive) to take only one byte. There are 11 encoding schemes
 * in total:
 * 
 * * 0 is encoded specially (as 0)
 * * 1..122 are encoded as immediate value with a shift
 * * 123..255 are encoded with code of 0x01 and 1 extra byte
 * * 0x100..0xffff are encoded with code of 0x02 and 2 extra bytes
 * * 0x10000..0xffffff are encoded with code of 0x03 and 3 extra
 *   bytes
 * * 0x1000000..0xffffffff are encoded with code of 0x04 and 4
 *   extra bytes
 * * -123..-1 are encoded as immediate value with another shift
 * * -256..-124 are encoded with code of 0xff and 1 extra byte
 * * -0x10000..-257 are encoded with code of 0xfe and 2 extra bytes
 * * -0x1000000..0x10001 are encoded with code of 0xfd and 3 extra
 *    bytes
 * * -0x40000000..-0x1000001 are encoded with code of 0xfc and 4
 *    extra bytes
 * 
 * Values beyond that are serialized as bignum (even if they
 * technically might be not Bignum class in Ruby implementation,
 * i.e. if they fit into 64 bits on a 64-bit platform).
 */

namespace RubyMarshal {
    class PackedInt extends \Kaitai\Struct\Struct {
        public function __construct(\Kaitai\Struct\Stream $_io, \Kaitai\Struct\Struct $_parent = null, \RubyMarshal $_root = null) {
            parent::__construct($_io, $_parent, $_root);
            $this->_read();
        }

        private function _read() {
            $this->_m_code = $this->_io->readU1();
            switch ($this->code()) {
                case 4:
                    $this->_m_encoded = $this->_io->readU4le();
                    break;
                case 1:
                    $this->_m_encoded = $this->_io->readU1();
                    break;
                case 252:
                    $this->_m_encoded = $this->_io->readU4le();
                    break;
                case 253:
                    $this->_m_encoded = $this->_io->readU2le();
                    break;
                case 3:
                    $this->_m_encoded = $this->_io->readU2le();
                    break;
                case 2:
                    $this->_m_encoded = $this->_io->readU2le();
                    break;
                case 255:
                    $this->_m_encoded = $this->_io->readU1();
                    break;
                case 254:
                    $this->_m_encoded = $this->_io->readU2le();
                    break;
            }
            switch ($this->code()) {
                case 3:
                    $this->_m_encoded2 = $this->_io->readU1();
                    break;
                case 253:
                    $this->_m_encoded2 = $this->_io->readU1();
                    break;
            }
        }
        protected $_m_isImmediate;
        public function isImmediate() {
            if ($this->_m_isImmediate !== null)
                return $this->_m_isImmediate;
            $this->_m_isImmediate =  (($this->code() > 4) && ($this->code() < 252)) ;
            return $this->_m_isImmediate;
        }
        protected $_m_value;
        public function value() {
            if ($this->_m_value !== null)
                return $this->_m_value;
            $this->_m_value = ($this->isImmediate() ? ($this->code() < 128 ? ($this->code() - 5) : (4 - (~($this->code()) & 127))) : ($this->code() == 0 ? 0 : ($this->code() == 255 ? ($this->encoded() - 256) : ($this->code() == 254 ? ($this->encoded() - 65536) : ($this->code() == 253 ? ((($this->encoded2() << 16) | $this->encoded()) - 16777216) : ($this->code() == 3 ? (($this->encoded2() << 16) | $this->encoded()) : $this->encoded()))))));
            return $this->_m_value;
        }
        protected $_m_code;
        protected $_m_encoded;
        protected $_m_encoded2;
        public function code() { return $this->_m_code; }
        public function encoded() { return $this->_m_encoded; }

        /**
         * One extra byte for 3-byte integers (0x03 and 0xfd), as
         * there is no standard `u3` type in KS.
         */
        public function encoded2() { return $this->_m_encoded2; }
    }
}

namespace RubyMarshal {
    class Pair extends \Kaitai\Struct\Struct {
        public function __construct(\Kaitai\Struct\Stream $_io, \Kaitai\Struct\Struct $_parent = null, \RubyMarshal $_root = null) {
            parent::__construct($_io, $_parent, $_root);
            $this->_read();
        }

        private function _read() {
            $this->_m_key = new \RubyMarshal\Record($this->_io, $this, $this->_root);
            $this->_m_value = new \RubyMarshal\Record($this->_io, $this, $this->_root);
        }
        protected $_m_key;
        protected $_m_value;
        public function key() { return $this->_m_key; }
        public function value() { return $this->_m_value; }
    }
}

namespace RubyMarshal {
    class InstanceVar extends \Kaitai\Struct\Struct {
        public function __construct(\Kaitai\Struct\Stream $_io, \RubyMarshal\Record $_parent = null, \RubyMarshal $_root = null) {
            parent::__construct($_io, $_parent, $_root);
            $this->_read();
        }

        private function _read() {
            $this->_m_obj = new \RubyMarshal\Record($this->_io, $this, $this->_root);
            $this->_m_numVars = new \RubyMarshal\PackedInt($this->_io, $this, $this->_root);
            $this->_m_vars = [];
            $n = $this->numVars()->value();
            for ($i = 0; $i < $n; $i++) {
                $this->_m_vars[] = new \RubyMarshal\Pair($this->_io, $this, $this->_root);
            }
        }
        protected $_m_obj;
        protected $_m_numVars;
        protected $_m_vars;
        public function obj() { return $this->_m_obj; }
        public function numVars() { return $this->_m_numVars; }
        public function vars() { return $this->_m_vars; }
    }
}

/**
 * Each record starts with a single byte that determines its type
 * (`code`) and contents. If necessary, additional info as parsed
 * as `body`, to be determined by `code`.
 */

namespace RubyMarshal {
    class Record extends \Kaitai\Struct\Struct {
        public function __construct(\Kaitai\Struct\Stream $_io, \Kaitai\Struct\Struct $_parent = null, \RubyMarshal $_root = null) {
            parent::__construct($_io, $_parent, $_root);
            $this->_read();
        }

        private function _read() {
            $this->_m_code = $this->_io->readU1();
            switch ($this->code()) {
                case \RubyMarshal\Codes::PACKED_INT:
                    $this->_m_body = new \RubyMarshal\PackedInt($this->_io, $this, $this->_root);
                    break;
                case \RubyMarshal\Codes::BIGNUM:
                    $this->_m_body = new \RubyMarshal\Bignum($this->_io, $this, $this->_root);
                    break;
                case \RubyMarshal\Codes::RUBY_ARRAY:
                    $this->_m_body = new \RubyMarshal\RubyArray($this->_io, $this, $this->_root);
                    break;
                case \RubyMarshal\Codes::RUBY_SYMBOL_LINK:
                    $this->_m_body = new \RubyMarshal\PackedInt($this->_io, $this, $this->_root);
                    break;
                case \RubyMarshal\Codes::RUBY_STRUCT:
                    $this->_m_body = new \RubyMarshal\RubyStruct($this->_io, $this, $this->_root);
                    break;
                case \RubyMarshal\Codes::RUBY_STRING:
                    $this->_m_body = new \RubyMarshal\RubyString($this->_io, $this, $this->_root);
                    break;
                case \RubyMarshal\Codes::INSTANCE_VAR:
                    $this->_m_body = new \RubyMarshal\InstanceVar($this->_io, $this, $this->_root);
                    break;
                case \RubyMarshal\Codes::RUBY_HASH:
                    $this->_m_body = new \RubyMarshal\RubyHash($this->_io, $this, $this->_root);
                    break;
                case \RubyMarshal\Codes::RUBY_SYMBOL:
                    $this->_m_body = new \RubyMarshal\RubySymbol($this->_io, $this, $this->_root);
                    break;
                case \RubyMarshal\Codes::RUBY_OBJECT_LINK:
                    $this->_m_body = new \RubyMarshal\PackedInt($this->_io, $this, $this->_root);
                    break;
            }
        }
        protected $_m_code;
        protected $_m_body;
        public function code() { return $this->_m_code; }
        public function body() { return $this->_m_body; }
    }
}

namespace RubyMarshal {
    class RubyHash extends \Kaitai\Struct\Struct {
        public function __construct(\Kaitai\Struct\Stream $_io, \RubyMarshal\Record $_parent = null, \RubyMarshal $_root = null) {
            parent::__construct($_io, $_parent, $_root);
            $this->_read();
        }

        private function _read() {
            $this->_m_numPairs = new \RubyMarshal\PackedInt($this->_io, $this, $this->_root);
            $this->_m_pairs = [];
            $n = $this->numPairs()->value();
            for ($i = 0; $i < $n; $i++) {
                $this->_m_pairs[] = new \RubyMarshal\Pair($this->_io, $this, $this->_root);
            }
        }
        protected $_m_numPairs;
        protected $_m_pairs;
        public function numPairs() { return $this->_m_numPairs; }
        public function pairs() { return $this->_m_pairs; }
    }
}

namespace RubyMarshal {
    class RubyString extends \Kaitai\Struct\Struct {
        public function __construct(\Kaitai\Struct\Stream $_io, \RubyMarshal\Record $_parent = null, \RubyMarshal $_root = null) {
            parent::__construct($_io, $_parent, $_root);
            $this->_read();
        }

        private function _read() {
            $this->_m_len = new \RubyMarshal\PackedInt($this->_io, $this, $this->_root);
            $this->_m_body = $this->_io->readBytes($this->len()->value());
        }
        protected $_m_len;
        protected $_m_body;
        public function len() { return $this->_m_len; }
        public function body() { return $this->_m_body; }
    }
}

namespace RubyMarshal {
    class Codes {
        const RUBY_STRING = 34;
        const CONST_NIL = 48;
        const RUBY_SYMBOL = 58;
        const RUBY_SYMBOL_LINK = 59;
        const RUBY_OBJECT_LINK = 64;
        const CONST_FALSE = 70;
        const INSTANCE_VAR = 73;
        const RUBY_STRUCT = 83;
        const CONST_TRUE = 84;
        const RUBY_ARRAY = 91;
        const PACKED_INT = 105;
        const BIGNUM = 108;
        const RUBY_HASH = 123;
    }
}