ruby_marshal: Nim parsing library

Ruby's Marshal module allows serialization and deserialization of many standard and arbitrary Ruby objects in a compact binary format. It is relatively fast, available in stdlibs standard and allows conservation of language-specific properties (such as symbols or encoding-aware strings).

Feature-wise, it is comparable to other language-specific implementations, such as:

From internal perspective, serialized stream consists of a simple magic header and a record.

KS implementation details

License: CC0-1.0

This page hosts a formal specification of ruby_marshal using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.

Nim source code to parse ruby_marshal

ruby_marshal.nim

import kaitai_struct_nim_runtime
import options

type
  RubyMarshal* = ref object of KaitaiStruct
    `version`*: seq[byte]
    `records`*: RubyMarshal_Record
    `parent`*: KaitaiStruct
  RubyMarshal_Codes* = enum
    ruby_string = 34
    const_nil = 48
    ruby_symbol = 58
    ruby_symbol_link = 59
    ruby_object_link = 64
    const_false = 70
    instance_var = 73
    ruby_struct = 83
    const_true = 84
    ruby_array = 91
    packed_int = 105
    bignum = 108
    ruby_hash = 123
  RubyMarshal_RubyArray* = ref object of KaitaiStruct
    `numElements`*: RubyMarshal_PackedInt
    `elements`*: seq[RubyMarshal_Record]
    `parent`*: RubyMarshal_Record
  RubyMarshal_Bignum* = ref object of KaitaiStruct
    `sign`*: uint8
    `lenDiv2`*: RubyMarshal_PackedInt
    `body`*: seq[byte]
    `parent`*: RubyMarshal_Record
  RubyMarshal_RubyStruct* = ref object of KaitaiStruct
    `name`*: RubyMarshal_Record
    `numMembers`*: RubyMarshal_PackedInt
    `members`*: seq[RubyMarshal_Pair]
    `parent`*: RubyMarshal_Record
  RubyMarshal_RubySymbol* = ref object of KaitaiStruct
    `len`*: RubyMarshal_PackedInt
    `name`*: string
    `parent`*: RubyMarshal_Record
  RubyMarshal_PackedInt* = ref object of KaitaiStruct
    `code`*: uint8
    `encoded`*: uint32
    `encoded2`*: uint8
    `parent`*: KaitaiStruct
    `isImmediateInst`: bool
    `isImmediateInstFlag`: bool
    `valueInst`: int
    `valueInstFlag`: bool
  RubyMarshal_Pair* = ref object of KaitaiStruct
    `key`*: RubyMarshal_Record
    `value`*: RubyMarshal_Record
    `parent`*: KaitaiStruct
  RubyMarshal_InstanceVar* = ref object of KaitaiStruct
    `obj`*: RubyMarshal_Record
    `numVars`*: RubyMarshal_PackedInt
    `vars`*: seq[RubyMarshal_Pair]
    `parent`*: RubyMarshal_Record
  RubyMarshal_Record* = ref object of KaitaiStruct
    `code`*: RubyMarshal_Codes
    `body`*: KaitaiStruct
    `parent`*: KaitaiStruct
  RubyMarshal_RubyHash* = ref object of KaitaiStruct
    `numPairs`*: RubyMarshal_PackedInt
    `pairs`*: seq[RubyMarshal_Pair]
    `parent`*: RubyMarshal_Record
  RubyMarshal_RubyString* = ref object of KaitaiStruct
    `len`*: RubyMarshal_PackedInt
    `body`*: seq[byte]
    `parent`*: RubyMarshal_Record

proc read*(_: typedesc[RubyMarshal], io: KaitaiStream, root: KaitaiStruct, parent: KaitaiStruct): RubyMarshal
proc read*(_: typedesc[RubyMarshal_RubyArray], io: KaitaiStream, root: KaitaiStruct, parent: RubyMarshal_Record): RubyMarshal_RubyArray
proc read*(_: typedesc[RubyMarshal_Bignum], io: KaitaiStream, root: KaitaiStruct, parent: RubyMarshal_Record): RubyMarshal_Bignum
proc read*(_: typedesc[RubyMarshal_RubyStruct], io: KaitaiStream, root: KaitaiStruct, parent: RubyMarshal_Record): RubyMarshal_RubyStruct
proc read*(_: typedesc[RubyMarshal_RubySymbol], io: KaitaiStream, root: KaitaiStruct, parent: RubyMarshal_Record): RubyMarshal_RubySymbol
proc read*(_: typedesc[RubyMarshal_PackedInt], io: KaitaiStream, root: KaitaiStruct, parent: KaitaiStruct): RubyMarshal_PackedInt
proc read*(_: typedesc[RubyMarshal_Pair], io: KaitaiStream, root: KaitaiStruct, parent: KaitaiStruct): RubyMarshal_Pair
proc read*(_: typedesc[RubyMarshal_InstanceVar], io: KaitaiStream, root: KaitaiStruct, parent: RubyMarshal_Record): RubyMarshal_InstanceVar
proc read*(_: typedesc[RubyMarshal_Record], io: KaitaiStream, root: KaitaiStruct, parent: KaitaiStruct): RubyMarshal_Record
proc read*(_: typedesc[RubyMarshal_RubyHash], io: KaitaiStream, root: KaitaiStruct, parent: RubyMarshal_Record): RubyMarshal_RubyHash
proc read*(_: typedesc[RubyMarshal_RubyString], io: KaitaiStream, root: KaitaiStruct, parent: RubyMarshal_Record): RubyMarshal_RubyString

proc isImmediate*(this: RubyMarshal_PackedInt): bool
proc value*(this: RubyMarshal_PackedInt): int


##[
Ruby's Marshal module allows serialization and deserialization of
many standard and arbitrary Ruby objects in a compact binary
format. It is relatively fast, available in stdlibs standard and
allows conservation of language-specific properties (such as symbols
or encoding-aware strings).

Feature-wise, it is comparable to other language-specific
implementations, such as:

* Java's
  [Serializable](https://docs.oracle.com/javase/8/docs/api/java/io/Serializable.html)
* .NET
  [BinaryFormatter](https://learn.microsoft.com/en-us/dotnet/api/system.runtime.serialization.formatters.binary.binaryformatter?view=net-7.0)
* Python's
  [marshal](https://docs.python.org/3/library/marshal.html),
  [pickle](https://docs.python.org/3/library/pickle.html) and
  [shelve](https://docs.python.org/3/library/shelve.html)

From internal perspective, serialized stream consists of a simple
magic header and a record.

@see <a href="https://docs.ruby-lang.org/en/2.4.0/marshal_rdoc.html#label-Stream+Format">Source</a>
]##
proc read*(_: typedesc[RubyMarshal], io: KaitaiStream, root: KaitaiStruct, parent: KaitaiStruct): RubyMarshal =
  template this: untyped = result
  this = new(RubyMarshal)
  let root = if root == nil: cast[RubyMarshal](this) else: cast[RubyMarshal](root)
  this.io = io
  this.root = root
  this.parent = parent

  let versionExpr = this.io.readBytes(int(2))
  this.version = versionExpr
  let recordsExpr = RubyMarshal_Record.read(this.io, this.root, this)
  this.records = recordsExpr

proc fromFile*(_: typedesc[RubyMarshal], filename: string): RubyMarshal =
  RubyMarshal.read(newKaitaiFileStream(filename), nil, nil)

proc read*(_: typedesc[RubyMarshal_RubyArray], io: KaitaiStream, root: KaitaiStruct, parent: RubyMarshal_Record): RubyMarshal_RubyArray =
  template this: untyped = result
  this = new(RubyMarshal_RubyArray)
  let root = if root == nil: cast[RubyMarshal](this) else: cast[RubyMarshal](root)
  this.io = io
  this.root = root
  this.parent = parent

  let numElementsExpr = RubyMarshal_PackedInt.read(this.io, this.root, this)
  this.numElements = numElementsExpr
  for i in 0 ..< int(this.numElements.value):
    let it = RubyMarshal_Record.read(this.io, this.root, this)
    this.elements.add(it)

proc fromFile*(_: typedesc[RubyMarshal_RubyArray], filename: string): RubyMarshal_RubyArray =
  RubyMarshal_RubyArray.read(newKaitaiFileStream(filename), nil, nil)


##[
@see <a href="https://docs.ruby-lang.org/en/2.4.0/marshal_rdoc.html#label-Bignum">Source</a>
]##
proc read*(_: typedesc[RubyMarshal_Bignum], io: KaitaiStream, root: KaitaiStruct, parent: RubyMarshal_Record): RubyMarshal_Bignum =
  template this: untyped = result
  this = new(RubyMarshal_Bignum)
  let root = if root == nil: cast[RubyMarshal](this) else: cast[RubyMarshal](root)
  this.io = io
  this.root = root
  this.parent = parent


  ##[
  A single byte containing `+` for a positive value or `-` for a negative value.
  ]##
  let signExpr = this.io.readU1()
  this.sign = signExpr

  ##[
  Length of bignum body, divided by 2.
  ]##
  let lenDiv2Expr = RubyMarshal_PackedInt.read(this.io, this.root, this)
  this.lenDiv2 = lenDiv2Expr

  ##[
  Bytes that represent the number, see ruby-lang.org docs for reconstruction algorithm.
  ]##
  let bodyExpr = this.io.readBytes(int((this.lenDiv2.value * 2)))
  this.body = bodyExpr

proc fromFile*(_: typedesc[RubyMarshal_Bignum], filename: string): RubyMarshal_Bignum =
  RubyMarshal_Bignum.read(newKaitaiFileStream(filename), nil, nil)


##[
@see <a href="https://docs.ruby-lang.org/en/2.4.0/marshal_rdoc.html#label-Struct">Source</a>
]##
proc read*(_: typedesc[RubyMarshal_RubyStruct], io: KaitaiStream, root: KaitaiStruct, parent: RubyMarshal_Record): RubyMarshal_RubyStruct =
  template this: untyped = result
  this = new(RubyMarshal_RubyStruct)
  let root = if root == nil: cast[RubyMarshal](this) else: cast[RubyMarshal](root)
  this.io = io
  this.root = root
  this.parent = parent


  ##[
  Symbol containing the name of the struct.
  ]##
  let nameExpr = RubyMarshal_Record.read(this.io, this.root, this)
  this.name = nameExpr

  ##[
  Number of members in a struct
  ]##
  let numMembersExpr = RubyMarshal_PackedInt.read(this.io, this.root, this)
  this.numMembers = numMembersExpr
  for i in 0 ..< int(this.numMembers.value):
    let it = RubyMarshal_Pair.read(this.io, this.root, this)
    this.members.add(it)

proc fromFile*(_: typedesc[RubyMarshal_RubyStruct], filename: string): RubyMarshal_RubyStruct =
  RubyMarshal_RubyStruct.read(newKaitaiFileStream(filename), nil, nil)


##[
@see <a href="https://docs.ruby-lang.org/en/2.4.0/marshal_rdoc.html#label-Symbols+and+Byte+Sequence">Source</a>
]##
proc read*(_: typedesc[RubyMarshal_RubySymbol], io: KaitaiStream, root: KaitaiStruct, parent: RubyMarshal_Record): RubyMarshal_RubySymbol =
  template this: untyped = result
  this = new(RubyMarshal_RubySymbol)
  let root = if root == nil: cast[RubyMarshal](this) else: cast[RubyMarshal](root)
  this.io = io
  this.root = root
  this.parent = parent

  let lenExpr = RubyMarshal_PackedInt.read(this.io, this.root, this)
  this.len = lenExpr
  let nameExpr = encode(this.io.readBytes(int(this.len.value)), "UTF-8")
  this.name = nameExpr

proc fromFile*(_: typedesc[RubyMarshal_RubySymbol], filename: string): RubyMarshal_RubySymbol =
  RubyMarshal_RubySymbol.read(newKaitaiFileStream(filename), nil, nil)


##[
Ruby uses sophisticated system to pack integers: first `code`
byte either determines packing scheme or carries encoded
immediate value (thus allowing smaller values from -123 to 122
(inclusive) to take only one byte. There are 11 encoding schemes
in total:

* 0 is encoded specially (as 0)
* 1..122 are encoded as immediate value with a shift
* 123..255 are encoded with code of 0x01 and 1 extra byte
* 0x100..0xffff are encoded with code of 0x02 and 2 extra bytes
* 0x10000..0xffffff are encoded with code of 0x03 and 3 extra
  bytes
* 0x1000000..0xffffffff are encoded with code of 0x04 and 4
  extra bytes
* -123..-1 are encoded as immediate value with another shift
* -256..-124 are encoded with code of 0xff and 1 extra byte
* -0x10000..-257 are encoded with code of 0xfe and 2 extra bytes
* -0x1000000..0x10001 are encoded with code of 0xfd and 3 extra
   bytes
* -0x40000000..-0x1000001 are encoded with code of 0xfc and 4
   extra bytes

Values beyond that are serialized as bignum (even if they
technically might be not Bignum class in Ruby implementation,
i.e. if they fit into 64 bits on a 64-bit platform).

@see <a href="https://docs.ruby-lang.org/en/2.4.0/marshal_rdoc.html#label-Fixnum+and+long">Source</a>
]##
proc read*(_: typedesc[RubyMarshal_PackedInt], io: KaitaiStream, root: KaitaiStruct, parent: KaitaiStruct): RubyMarshal_PackedInt =
  template this: untyped = result
  this = new(RubyMarshal_PackedInt)
  let root = if root == nil: cast[RubyMarshal](this) else: cast[RubyMarshal](root)
  this.io = io
  this.root = root
  this.parent = parent

  let codeExpr = this.io.readU1()
  this.code = codeExpr
  block:
    let on = this.code
    if on == 4:
      let encodedExpr = this.io.readU4le()
      this.encoded = encodedExpr
    elif on == 1:
      let encodedExpr = uint32(this.io.readU1())
      this.encoded = encodedExpr
    elif on == 252:
      let encodedExpr = this.io.readU4le()
      this.encoded = encodedExpr
    elif on == 253:
      let encodedExpr = uint32(this.io.readU2le())
      this.encoded = encodedExpr
    elif on == 3:
      let encodedExpr = uint32(this.io.readU2le())
      this.encoded = encodedExpr
    elif on == 2:
      let encodedExpr = uint32(this.io.readU2le())
      this.encoded = encodedExpr
    elif on == 255:
      let encodedExpr = uint32(this.io.readU1())
      this.encoded = encodedExpr
    elif on == 254:
      let encodedExpr = uint32(this.io.readU2le())
      this.encoded = encodedExpr

  ##[
  One extra byte for 3-byte integers (0x03 and 0xfd), as
there is no standard `u3` type in KS.

  ]##
  block:
    let on = this.code
    if on == 3:
      let encoded2Expr = this.io.readU1()
      this.encoded2 = encoded2Expr
    elif on == 253:
      let encoded2Expr = this.io.readU1()
      this.encoded2 = encoded2Expr

proc isImmediate(this: RubyMarshal_PackedInt): bool = 
  if this.isImmediateInstFlag:
    return this.isImmediateInst
  let isImmediateInstExpr = bool( ((this.code > 4) and (this.code < 252)) )
  this.isImmediateInst = isImmediateInstExpr
  this.isImmediateInstFlag = true
  return this.isImmediateInst

proc value(this: RubyMarshal_PackedInt): int = 
  if this.valueInstFlag:
    return this.valueInst
  let valueInstExpr = int((if this.isImmediate: (if this.code < 128: (this.code - 5) else: (4 - (not(this.code) and 127))) else: (if this.code == 0: 0 else: (if this.code == 255: (this.encoded - 256) else: (if this.code == 254: (this.encoded - 65536) else: (if this.code == 253: (((this.encoded2 shl 16) or this.encoded) - 16777216) else: (if this.code == 3: ((this.encoded2 shl 16) or this.encoded) else: this.encoded)))))))
  this.valueInst = valueInstExpr
  this.valueInstFlag = true
  return this.valueInst

proc fromFile*(_: typedesc[RubyMarshal_PackedInt], filename: string): RubyMarshal_PackedInt =
  RubyMarshal_PackedInt.read(newKaitaiFileStream(filename), nil, nil)

proc read*(_: typedesc[RubyMarshal_Pair], io: KaitaiStream, root: KaitaiStruct, parent: KaitaiStruct): RubyMarshal_Pair =
  template this: untyped = result
  this = new(RubyMarshal_Pair)
  let root = if root == nil: cast[RubyMarshal](this) else: cast[RubyMarshal](root)
  this.io = io
  this.root = root
  this.parent = parent

  let keyExpr = RubyMarshal_Record.read(this.io, this.root, this)
  this.key = keyExpr
  let valueExpr = RubyMarshal_Record.read(this.io, this.root, this)
  this.value = valueExpr

proc fromFile*(_: typedesc[RubyMarshal_Pair], filename: string): RubyMarshal_Pair =
  RubyMarshal_Pair.read(newKaitaiFileStream(filename), nil, nil)


##[
@see <a href="https://docs.ruby-lang.org/en/2.4.0/marshal_rdoc.html#label-Instance+Variables">Source</a>
]##
proc read*(_: typedesc[RubyMarshal_InstanceVar], io: KaitaiStream, root: KaitaiStruct, parent: RubyMarshal_Record): RubyMarshal_InstanceVar =
  template this: untyped = result
  this = new(RubyMarshal_InstanceVar)
  let root = if root == nil: cast[RubyMarshal](this) else: cast[RubyMarshal](root)
  this.io = io
  this.root = root
  this.parent = parent

  let objExpr = RubyMarshal_Record.read(this.io, this.root, this)
  this.obj = objExpr
  let numVarsExpr = RubyMarshal_PackedInt.read(this.io, this.root, this)
  this.numVars = numVarsExpr
  for i in 0 ..< int(this.numVars.value):
    let it = RubyMarshal_Pair.read(this.io, this.root, this)
    this.vars.add(it)

proc fromFile*(_: typedesc[RubyMarshal_InstanceVar], filename: string): RubyMarshal_InstanceVar =
  RubyMarshal_InstanceVar.read(newKaitaiFileStream(filename), nil, nil)


##[
Each record starts with a single byte that determines its type
(`code`) and contents. If necessary, additional info as parsed
as `body`, to be determined by `code`.

]##
proc read*(_: typedesc[RubyMarshal_Record], io: KaitaiStream, root: KaitaiStruct, parent: KaitaiStruct): RubyMarshal_Record =
  template this: untyped = result
  this = new(RubyMarshal_Record)
  let root = if root == nil: cast[RubyMarshal](this) else: cast[RubyMarshal](root)
  this.io = io
  this.root = root
  this.parent = parent

  let codeExpr = RubyMarshal_Codes(this.io.readU1())
  this.code = codeExpr
  block:
    let on = this.code
    if on == ruby_marshal.packed_int:
      let bodyExpr = RubyMarshal_PackedInt.read(this.io, this.root, this)
      this.body = bodyExpr
    elif on == ruby_marshal.bignum:
      let bodyExpr = RubyMarshal_Bignum.read(this.io, this.root, this)
      this.body = bodyExpr
    elif on == ruby_marshal.ruby_array:
      let bodyExpr = RubyMarshal_RubyArray.read(this.io, this.root, this)
      this.body = bodyExpr
    elif on == ruby_marshal.ruby_symbol_link:
      let bodyExpr = RubyMarshal_PackedInt.read(this.io, this.root, this)
      this.body = bodyExpr
    elif on == ruby_marshal.ruby_struct:
      let bodyExpr = RubyMarshal_RubyStruct.read(this.io, this.root, this)
      this.body = bodyExpr
    elif on == ruby_marshal.ruby_string:
      let bodyExpr = RubyMarshal_RubyString.read(this.io, this.root, this)
      this.body = bodyExpr
    elif on == ruby_marshal.instance_var:
      let bodyExpr = RubyMarshal_InstanceVar.read(this.io, this.root, this)
      this.body = bodyExpr
    elif on == ruby_marshal.ruby_hash:
      let bodyExpr = RubyMarshal_RubyHash.read(this.io, this.root, this)
      this.body = bodyExpr
    elif on == ruby_marshal.ruby_symbol:
      let bodyExpr = RubyMarshal_RubySymbol.read(this.io, this.root, this)
      this.body = bodyExpr
    elif on == ruby_marshal.ruby_object_link:
      let bodyExpr = RubyMarshal_PackedInt.read(this.io, this.root, this)
      this.body = bodyExpr

proc fromFile*(_: typedesc[RubyMarshal_Record], filename: string): RubyMarshal_Record =
  RubyMarshal_Record.read(newKaitaiFileStream(filename), nil, nil)


##[
@see <a href="https://docs.ruby-lang.org/en/2.4.0/marshal_rdoc.html#label-Hash+and+Hash+with+Default+Value">Source</a>
]##
proc read*(_: typedesc[RubyMarshal_RubyHash], io: KaitaiStream, root: KaitaiStruct, parent: RubyMarshal_Record): RubyMarshal_RubyHash =
  template this: untyped = result
  this = new(RubyMarshal_RubyHash)
  let root = if root == nil: cast[RubyMarshal](this) else: cast[RubyMarshal](root)
  this.io = io
  this.root = root
  this.parent = parent

  let numPairsExpr = RubyMarshal_PackedInt.read(this.io, this.root, this)
  this.numPairs = numPairsExpr
  for i in 0 ..< int(this.numPairs.value):
    let it = RubyMarshal_Pair.read(this.io, this.root, this)
    this.pairs.add(it)

proc fromFile*(_: typedesc[RubyMarshal_RubyHash], filename: string): RubyMarshal_RubyHash =
  RubyMarshal_RubyHash.read(newKaitaiFileStream(filename), nil, nil)


##[
@see <a href="https://docs.ruby-lang.org/en/2.4.0/marshal_rdoc.html#label-String">Source</a>
]##
proc read*(_: typedesc[RubyMarshal_RubyString], io: KaitaiStream, root: KaitaiStruct, parent: RubyMarshal_Record): RubyMarshal_RubyString =
  template this: untyped = result
  this = new(RubyMarshal_RubyString)
  let root = if root == nil: cast[RubyMarshal](this) else: cast[RubyMarshal](root)
  this.io = io
  this.root = root
  this.parent = parent

  let lenExpr = RubyMarshal_PackedInt.read(this.io, this.root, this)
  this.len = lenExpr
  let bodyExpr = this.io.readBytes(int(this.len.value))
  this.body = bodyExpr

proc fromFile*(_: typedesc[RubyMarshal_RubyString], filename: string): RubyMarshal_RubyString =
  RubyMarshal_RubyString.read(newKaitaiFileStream(filename), nil, nil)