Ruby's Marshal module allows serialization and deserialization of many standard and arbitrary Ruby objects in a compact binary format. It is relatively fast, available in stdlibs standard and allows conservation of language-specific properties (such as symbols or encoding-aware strings).
Feature-wise, it is comparable to other language-specific implementations, such as:
From internal perspective, serialized stream consists of a simple magic header and a record.
This page hosts a formal specification of ruby_marshal using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.
// Code generated by kaitai-struct-compiler from a .ksy source file. DO NOT EDIT.
import (
"github.com/kaitai-io/kaitai_struct_go_runtime/kaitai"
"bytes"
)
/**
* Ruby's Marshal module allows serialization and deserialization of
* many standard and arbitrary Ruby objects in a compact binary
* format. It is relatively fast, available in stdlibs standard and
* allows conservation of language-specific properties (such as symbols
* or encoding-aware strings).
*
* Feature-wise, it is comparable to other language-specific
* implementations, such as:
*
* * Java's
* [Serializable](https://docs.oracle.com/javase/8/docs/api/java/io/Serializable.html)
* * .NET
* [BinaryFormatter](https://learn.microsoft.com/en-us/dotnet/api/system.runtime.serialization.formatters.binary.binaryformatter?view=net-7.0)
* * Python's
* [marshal](https://docs.python.org/3/library/marshal.html),
* [pickle](https://docs.python.org/3/library/pickle.html) and
* [shelve](https://docs.python.org/3/library/shelve.html)
*
* From internal perspective, serialized stream consists of a simple
* magic header and a record.
* @see <a href="https://docs.ruby-lang.org/en/2.4.0/marshal_rdoc.html#label-Stream+Format">Source</a>
*/
type RubyMarshal_Codes int
const (
RubyMarshal_Codes__RubyString RubyMarshal_Codes = 34
RubyMarshal_Codes__ConstNil RubyMarshal_Codes = 48
RubyMarshal_Codes__RubySymbol RubyMarshal_Codes = 58
RubyMarshal_Codes__RubySymbolLink RubyMarshal_Codes = 59
RubyMarshal_Codes__RubyObjectLink RubyMarshal_Codes = 64
RubyMarshal_Codes__ConstFalse RubyMarshal_Codes = 70
RubyMarshal_Codes__InstanceVar RubyMarshal_Codes = 73
RubyMarshal_Codes__RubyStruct RubyMarshal_Codes = 83
RubyMarshal_Codes__ConstTrue RubyMarshal_Codes = 84
RubyMarshal_Codes__RubyArray RubyMarshal_Codes = 91
RubyMarshal_Codes__PackedInt RubyMarshal_Codes = 105
RubyMarshal_Codes__Bignum RubyMarshal_Codes = 108
RubyMarshal_Codes__RubyHash RubyMarshal_Codes = 123
)
var values_RubyMarshal_Codes = map[RubyMarshal_Codes]struct{}{34: {}, 48: {}, 58: {}, 59: {}, 64: {}, 70: {}, 73: {}, 83: {}, 84: {}, 91: {}, 105: {}, 108: {}, 123: {}}
func (v RubyMarshal_Codes) isDefined() bool {
_, ok := values_RubyMarshal_Codes[v]
return ok
}
type RubyMarshal struct {
Version []byte
Records *RubyMarshal_Record
_io *kaitai.Stream
_root *RubyMarshal
_parent kaitai.Struct
}
func NewRubyMarshal() *RubyMarshal {
return &RubyMarshal{
}
}
func (this RubyMarshal) IO_() *kaitai.Stream {
return this._io
}
func (this *RubyMarshal) Read(io *kaitai.Stream, parent kaitai.Struct, root *RubyMarshal) (err error) {
this._io = io
this._parent = parent
this._root = root
tmp1, err := this._io.ReadBytes(int(2))
if err != nil {
return err
}
tmp1 = tmp1
this.Version = tmp1
if !(bytes.Equal(this.Version, []uint8{4, 8})) {
return kaitai.NewValidationNotEqualError([]uint8{4, 8}, this.Version, this._io, "/seq/0")
}
tmp2 := NewRubyMarshal_Record()
err = tmp2.Read(this._io, this, this._root)
if err != nil {
return err
}
this.Records = tmp2
return err
}
/**
* @see <a href="https://docs.ruby-lang.org/en/2.4.0/marshal_rdoc.html#label-Bignum">Source</a>
*/
type RubyMarshal_Bignum struct {
Sign uint8
LenDiv2 *RubyMarshal_PackedInt
Body []byte
_io *kaitai.Stream
_root *RubyMarshal
_parent *RubyMarshal_Record
}
func NewRubyMarshal_Bignum() *RubyMarshal_Bignum {
return &RubyMarshal_Bignum{
}
}
func (this RubyMarshal_Bignum) IO_() *kaitai.Stream {
return this._io
}
func (this *RubyMarshal_Bignum) Read(io *kaitai.Stream, parent *RubyMarshal_Record, root *RubyMarshal) (err error) {
this._io = io
this._parent = parent
this._root = root
tmp3, err := this._io.ReadU1()
if err != nil {
return err
}
this.Sign = tmp3
tmp4 := NewRubyMarshal_PackedInt()
err = tmp4.Read(this._io, this, this._root)
if err != nil {
return err
}
this.LenDiv2 = tmp4
tmp5, err := this.LenDiv2.Value()
if err != nil {
return err
}
tmp6, err := this._io.ReadBytes(int(tmp5 * 2))
if err != nil {
return err
}
tmp6 = tmp6
this.Body = tmp6
return err
}
/**
* A single byte containing `+` for a positive value or `-` for a negative value.
*/
/**
* Length of bignum body, divided by 2.
*/
/**
* Bytes that represent the number, see ruby-lang.org docs for reconstruction algorithm.
*/
/**
* @see <a href="https://docs.ruby-lang.org/en/2.4.0/marshal_rdoc.html#label-Instance+Variables">Source</a>
*/
type RubyMarshal_InstanceVar struct {
Obj *RubyMarshal_Record
NumVars *RubyMarshal_PackedInt
Vars []*RubyMarshal_Pair
_io *kaitai.Stream
_root *RubyMarshal
_parent *RubyMarshal_Record
}
func NewRubyMarshal_InstanceVar() *RubyMarshal_InstanceVar {
return &RubyMarshal_InstanceVar{
}
}
func (this RubyMarshal_InstanceVar) IO_() *kaitai.Stream {
return this._io
}
func (this *RubyMarshal_InstanceVar) Read(io *kaitai.Stream, parent *RubyMarshal_Record, root *RubyMarshal) (err error) {
this._io = io
this._parent = parent
this._root = root
tmp7 := NewRubyMarshal_Record()
err = tmp7.Read(this._io, this, this._root)
if err != nil {
return err
}
this.Obj = tmp7
tmp8 := NewRubyMarshal_PackedInt()
err = tmp8.Read(this._io, this, this._root)
if err != nil {
return err
}
this.NumVars = tmp8
tmp9, err := this.NumVars.Value()
if err != nil {
return err
}
for i := 0; i < int(tmp9); i++ {
_ = i
tmp10 := NewRubyMarshal_Pair()
err = tmp10.Read(this._io, this, this._root)
if err != nil {
return err
}
this.Vars = append(this.Vars, tmp10)
}
return err
}
/**
* Ruby uses sophisticated system to pack integers: first `code`
* byte either determines packing scheme or carries encoded
* immediate value (thus allowing smaller values from -123 to 122
* (inclusive) to take only one byte. There are 11 encoding schemes
* in total:
*
* * 0 is encoded specially (as 0)
* * 1..122 are encoded as immediate value with a shift
* * 123..255 are encoded with code of 0x01 and 1 extra byte
* * 0x100..0xffff are encoded with code of 0x02 and 2 extra bytes
* * 0x10000..0xffffff are encoded with code of 0x03 and 3 extra
* bytes
* * 0x1000000..0xffffffff are encoded with code of 0x04 and 4
* extra bytes
* * -123..-1 are encoded as immediate value with another shift
* * -256..-124 are encoded with code of 0xff and 1 extra byte
* * -0x10000..-257 are encoded with code of 0xfe and 2 extra bytes
* * -0x1000000..0x10001 are encoded with code of 0xfd and 3 extra
* bytes
* * -0x40000000..-0x1000001 are encoded with code of 0xfc and 4
* extra bytes
*
* Values beyond that are serialized as bignum (even if they
* technically might be not Bignum class in Ruby implementation,
* i.e. if they fit into 64 bits on a 64-bit platform).
* @see <a href="https://docs.ruby-lang.org/en/2.4.0/marshal_rdoc.html#label-Fixnum+and+long">Source</a>
*/
type RubyMarshal_PackedInt struct {
Code uint8
Encoded uint32
Encoded2 uint8
_io *kaitai.Stream
_root *RubyMarshal
_parent kaitai.Struct
_f_isImmediate bool
isImmediate bool
_f_value bool
value int
}
func NewRubyMarshal_PackedInt() *RubyMarshal_PackedInt {
return &RubyMarshal_PackedInt{
}
}
func (this RubyMarshal_PackedInt) IO_() *kaitai.Stream {
return this._io
}
func (this *RubyMarshal_PackedInt) Read(io *kaitai.Stream, parent kaitai.Struct, root *RubyMarshal) (err error) {
this._io = io
this._parent = parent
this._root = root
tmp11, err := this._io.ReadU1()
if err != nil {
return err
}
this.Code = tmp11
switch (this.Code) {
case 1:
tmp12, err := this._io.ReadU1()
if err != nil {
return err
}
this.Encoded = uint32(tmp12)
case 2:
tmp13, err := this._io.ReadU2le()
if err != nil {
return err
}
this.Encoded = uint32(tmp13)
case 252:
tmp14, err := this._io.ReadU4le()
if err != nil {
return err
}
this.Encoded = uint32(tmp14)
case 253:
tmp15, err := this._io.ReadU2le()
if err != nil {
return err
}
this.Encoded = uint32(tmp15)
case 254:
tmp16, err := this._io.ReadU2le()
if err != nil {
return err
}
this.Encoded = uint32(tmp16)
case 255:
tmp17, err := this._io.ReadU1()
if err != nil {
return err
}
this.Encoded = uint32(tmp17)
case 3:
tmp18, err := this._io.ReadU2le()
if err != nil {
return err
}
this.Encoded = uint32(tmp18)
case 4:
tmp19, err := this._io.ReadU4le()
if err != nil {
return err
}
this.Encoded = uint32(tmp19)
}
switch (this.Code) {
case 253:
tmp20, err := this._io.ReadU1()
if err != nil {
return err
}
this.Encoded2 = tmp20
case 3:
tmp21, err := this._io.ReadU1()
if err != nil {
return err
}
this.Encoded2 = tmp21
}
return err
}
func (this *RubyMarshal_PackedInt) IsImmediate() (v bool, err error) {
if (this._f_isImmediate) {
return this.isImmediate, nil
}
this._f_isImmediate = true
this.isImmediate = bool( ((this.Code > 4) && (this.Code < 252)) )
return this.isImmediate, nil
}
func (this *RubyMarshal_PackedInt) Value() (v int, err error) {
if (this._f_value) {
return this.value, nil
}
this._f_value = true
var tmp22 int;
tmp23, err := this.IsImmediate()
if err != nil {
return 0, err
}
if (tmp23) {
var tmp24 int;
if (this.Code < 128) {
tmp24 = this.Code - 5
} else {
tmp24 = 4 - ^(this.Code) & 127
}
tmp22 = tmp24
} else {
var tmp25 int8;
if (this.Code == 0) {
tmp25 = 0
} else {
var tmp26 int;
if (this.Code == 255) {
tmp26 = this.Encoded - 256
} else {
var tmp27 int;
if (this.Code == 254) {
tmp27 = this.Encoded - 65536
} else {
var tmp28 int;
if (this.Code == 253) {
tmp28 = (this.Encoded2 << 16 | this.Encoded) - 16777216
} else {
var tmp29 int;
if (this.Code == 3) {
tmp29 = this.Encoded2 << 16 | this.Encoded
} else {
tmp29 = this.Encoded
}
tmp28 = tmp29
}
tmp27 = tmp28
}
tmp26 = tmp27
}
tmp25 = tmp26
}
tmp22 = tmp25
}
this.value = int(tmp22)
return this.value, nil
}
/**
* One extra byte for 3-byte integers (0x03 and 0xfd), as
* there is no standard `u3` type in KS.
*/
type RubyMarshal_Pair struct {
Key *RubyMarshal_Record
Value *RubyMarshal_Record
_io *kaitai.Stream
_root *RubyMarshal
_parent kaitai.Struct
}
func NewRubyMarshal_Pair() *RubyMarshal_Pair {
return &RubyMarshal_Pair{
}
}
func (this RubyMarshal_Pair) IO_() *kaitai.Stream {
return this._io
}
func (this *RubyMarshal_Pair) Read(io *kaitai.Stream, parent kaitai.Struct, root *RubyMarshal) (err error) {
this._io = io
this._parent = parent
this._root = root
tmp30 := NewRubyMarshal_Record()
err = tmp30.Read(this._io, this, this._root)
if err != nil {
return err
}
this.Key = tmp30
tmp31 := NewRubyMarshal_Record()
err = tmp31.Read(this._io, this, this._root)
if err != nil {
return err
}
this.Value = tmp31
return err
}
/**
* Each record starts with a single byte that determines its type
* (`code`) and contents. If necessary, additional info as parsed
* as `body`, to be determined by `code`.
*/
type RubyMarshal_Record struct {
Code RubyMarshal_Codes
Body kaitai.Struct
_io *kaitai.Stream
_root *RubyMarshal
_parent kaitai.Struct
}
func NewRubyMarshal_Record() *RubyMarshal_Record {
return &RubyMarshal_Record{
}
}
func (this RubyMarshal_Record) IO_() *kaitai.Stream {
return this._io
}
func (this *RubyMarshal_Record) Read(io *kaitai.Stream, parent kaitai.Struct, root *RubyMarshal) (err error) {
this._io = io
this._parent = parent
this._root = root
tmp32, err := this._io.ReadU1()
if err != nil {
return err
}
this.Code = RubyMarshal_Codes(tmp32)
switch (this.Code) {
case RubyMarshal_Codes__Bignum:
tmp33 := NewRubyMarshal_Bignum()
err = tmp33.Read(this._io, this, this._root)
if err != nil {
return err
}
this.Body = tmp33
case RubyMarshal_Codes__InstanceVar:
tmp34 := NewRubyMarshal_InstanceVar()
err = tmp34.Read(this._io, this, this._root)
if err != nil {
return err
}
this.Body = tmp34
case RubyMarshal_Codes__PackedInt:
tmp35 := NewRubyMarshal_PackedInt()
err = tmp35.Read(this._io, this, this._root)
if err != nil {
return err
}
this.Body = tmp35
case RubyMarshal_Codes__RubyArray:
tmp36 := NewRubyMarshal_RubyArray()
err = tmp36.Read(this._io, this, this._root)
if err != nil {
return err
}
this.Body = tmp36
case RubyMarshal_Codes__RubyHash:
tmp37 := NewRubyMarshal_RubyHash()
err = tmp37.Read(this._io, this, this._root)
if err != nil {
return err
}
this.Body = tmp37
case RubyMarshal_Codes__RubyObjectLink:
tmp38 := NewRubyMarshal_PackedInt()
err = tmp38.Read(this._io, this, this._root)
if err != nil {
return err
}
this.Body = tmp38
case RubyMarshal_Codes__RubyString:
tmp39 := NewRubyMarshal_RubyString()
err = tmp39.Read(this._io, this, this._root)
if err != nil {
return err
}
this.Body = tmp39
case RubyMarshal_Codes__RubyStruct:
tmp40 := NewRubyMarshal_RubyStruct()
err = tmp40.Read(this._io, this, this._root)
if err != nil {
return err
}
this.Body = tmp40
case RubyMarshal_Codes__RubySymbol:
tmp41 := NewRubyMarshal_RubySymbol()
err = tmp41.Read(this._io, this, this._root)
if err != nil {
return err
}
this.Body = tmp41
case RubyMarshal_Codes__RubySymbolLink:
tmp42 := NewRubyMarshal_PackedInt()
err = tmp42.Read(this._io, this, this._root)
if err != nil {
return err
}
this.Body = tmp42
}
return err
}
type RubyMarshal_RubyArray struct {
NumElements *RubyMarshal_PackedInt
Elements []*RubyMarshal_Record
_io *kaitai.Stream
_root *RubyMarshal
_parent *RubyMarshal_Record
}
func NewRubyMarshal_RubyArray() *RubyMarshal_RubyArray {
return &RubyMarshal_RubyArray{
}
}
func (this RubyMarshal_RubyArray) IO_() *kaitai.Stream {
return this._io
}
func (this *RubyMarshal_RubyArray) Read(io *kaitai.Stream, parent *RubyMarshal_Record, root *RubyMarshal) (err error) {
this._io = io
this._parent = parent
this._root = root
tmp43 := NewRubyMarshal_PackedInt()
err = tmp43.Read(this._io, this, this._root)
if err != nil {
return err
}
this.NumElements = tmp43
tmp44, err := this.NumElements.Value()
if err != nil {
return err
}
for i := 0; i < int(tmp44); i++ {
_ = i
tmp45 := NewRubyMarshal_Record()
err = tmp45.Read(this._io, this, this._root)
if err != nil {
return err
}
this.Elements = append(this.Elements, tmp45)
}
return err
}
/**
* @see <a href="https://docs.ruby-lang.org/en/2.4.0/marshal_rdoc.html#label-Hash+and+Hash+with+Default+Value">Source</a>
*/
type RubyMarshal_RubyHash struct {
NumPairs *RubyMarshal_PackedInt
Pairs []*RubyMarshal_Pair
_io *kaitai.Stream
_root *RubyMarshal
_parent *RubyMarshal_Record
}
func NewRubyMarshal_RubyHash() *RubyMarshal_RubyHash {
return &RubyMarshal_RubyHash{
}
}
func (this RubyMarshal_RubyHash) IO_() *kaitai.Stream {
return this._io
}
func (this *RubyMarshal_RubyHash) Read(io *kaitai.Stream, parent *RubyMarshal_Record, root *RubyMarshal) (err error) {
this._io = io
this._parent = parent
this._root = root
tmp46 := NewRubyMarshal_PackedInt()
err = tmp46.Read(this._io, this, this._root)
if err != nil {
return err
}
this.NumPairs = tmp46
tmp47, err := this.NumPairs.Value()
if err != nil {
return err
}
for i := 0; i < int(tmp47); i++ {
_ = i
tmp48 := NewRubyMarshal_Pair()
err = tmp48.Read(this._io, this, this._root)
if err != nil {
return err
}
this.Pairs = append(this.Pairs, tmp48)
}
return err
}
/**
* @see <a href="https://docs.ruby-lang.org/en/2.4.0/marshal_rdoc.html#label-String">Source</a>
*/
type RubyMarshal_RubyString struct {
Len *RubyMarshal_PackedInt
Body []byte
_io *kaitai.Stream
_root *RubyMarshal
_parent *RubyMarshal_Record
}
func NewRubyMarshal_RubyString() *RubyMarshal_RubyString {
return &RubyMarshal_RubyString{
}
}
func (this RubyMarshal_RubyString) IO_() *kaitai.Stream {
return this._io
}
func (this *RubyMarshal_RubyString) Read(io *kaitai.Stream, parent *RubyMarshal_Record, root *RubyMarshal) (err error) {
this._io = io
this._parent = parent
this._root = root
tmp49 := NewRubyMarshal_PackedInt()
err = tmp49.Read(this._io, this, this._root)
if err != nil {
return err
}
this.Len = tmp49
tmp50, err := this.Len.Value()
if err != nil {
return err
}
tmp51, err := this._io.ReadBytes(int(tmp50))
if err != nil {
return err
}
tmp51 = tmp51
this.Body = tmp51
return err
}
/**
* @see <a href="https://docs.ruby-lang.org/en/2.4.0/marshal_rdoc.html#label-Struct">Source</a>
*/
type RubyMarshal_RubyStruct struct {
Name *RubyMarshal_Record
NumMembers *RubyMarshal_PackedInt
Members []*RubyMarshal_Pair
_io *kaitai.Stream
_root *RubyMarshal
_parent *RubyMarshal_Record
}
func NewRubyMarshal_RubyStruct() *RubyMarshal_RubyStruct {
return &RubyMarshal_RubyStruct{
}
}
func (this RubyMarshal_RubyStruct) IO_() *kaitai.Stream {
return this._io
}
func (this *RubyMarshal_RubyStruct) Read(io *kaitai.Stream, parent *RubyMarshal_Record, root *RubyMarshal) (err error) {
this._io = io
this._parent = parent
this._root = root
tmp52 := NewRubyMarshal_Record()
err = tmp52.Read(this._io, this, this._root)
if err != nil {
return err
}
this.Name = tmp52
tmp53 := NewRubyMarshal_PackedInt()
err = tmp53.Read(this._io, this, this._root)
if err != nil {
return err
}
this.NumMembers = tmp53
tmp54, err := this.NumMembers.Value()
if err != nil {
return err
}
for i := 0; i < int(tmp54); i++ {
_ = i
tmp55 := NewRubyMarshal_Pair()
err = tmp55.Read(this._io, this, this._root)
if err != nil {
return err
}
this.Members = append(this.Members, tmp55)
}
return err
}
/**
* Symbol containing the name of the struct.
*/
/**
* Number of members in a struct
*/
/**
* @see <a href="https://docs.ruby-lang.org/en/2.4.0/marshal_rdoc.html#label-Symbols+and+Byte+Sequence">Source</a>
*/
type RubyMarshal_RubySymbol struct {
Len *RubyMarshal_PackedInt
Name string
_io *kaitai.Stream
_root *RubyMarshal
_parent *RubyMarshal_Record
}
func NewRubyMarshal_RubySymbol() *RubyMarshal_RubySymbol {
return &RubyMarshal_RubySymbol{
}
}
func (this RubyMarshal_RubySymbol) IO_() *kaitai.Stream {
return this._io
}
func (this *RubyMarshal_RubySymbol) Read(io *kaitai.Stream, parent *RubyMarshal_Record, root *RubyMarshal) (err error) {
this._io = io
this._parent = parent
this._root = root
tmp56 := NewRubyMarshal_PackedInt()
err = tmp56.Read(this._io, this, this._root)
if err != nil {
return err
}
this.Len = tmp56
tmp57, err := this.Len.Value()
if err != nil {
return err
}
tmp58, err := this._io.ReadBytes(int(tmp57))
if err != nil {
return err
}
tmp58 = tmp58
this.Name = string(tmp58)
return err
}