RAR is a archive format used by popular proprietary RAR archiver, created by Eugene Roshal. There are two major versions of format (v1.5-4.0 and RAR v5+).
File format essentially consists of a linear sequence of blocks. Each block has fixed header and custom body (that depends on block type), so it's possible to skip block even if one doesn't know how to process a certain block type.
This page hosts a formal specification of RAR (Roshall ARchiver) archive files using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.
All parsing code for Ruby generated by Kaitai Struct depends on the Ruby runtime library. You have to install it before you can parse data.
The Ruby runtime library can be installed from RubyGems:
gem install kaitai-struct
Parse a local file and get structure in memory:
data = Rar.from_file("path/to/local/file.rar")
Or parse structure from a string of bytes:
bytes = "\x00\x01\x02..."
data = Rar.new(Kaitai::Struct::Stream.new(bytes))
After that, one can get various attributes from the structure by invoking getter methods like:
data.magic # => File format signature to validate that it is indeed a RAR archive
# This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild
require 'kaitai/struct/struct'
unless Gem::Version.new(Kaitai::Struct::VERSION) >= Gem::Version.new('0.9')
raise "Incompatible Kaitai Struct Ruby API: 0.9 or later is required, but you have #{Kaitai::Struct::VERSION}"
end
##
# RAR is a archive format used by popular proprietary RAR archiver,
# created by Eugene Roshal. There are two major versions of format
# (v1.5-4.0 and RAR v5+).
#
# File format essentially consists of a linear sequence of
# blocks. Each block has fixed header and custom body (that depends on
# block type), so it's possible to skip block even if one doesn't know
# how to process a certain block type.
# @see http://acritum.com/winrar/rar-format Source
class Rar < Kaitai::Struct::Struct
BLOCK_TYPES = {
114 => :block_types_marker,
115 => :block_types_archive_header,
116 => :block_types_file_header,
117 => :block_types_old_style_comment_header,
118 => :block_types_old_style_authenticity_info_76,
119 => :block_types_old_style_subblock,
120 => :block_types_old_style_recovery_record,
121 => :block_types_old_style_authenticity_info_79,
122 => :block_types_subblock,
123 => :block_types_terminator,
}
I__BLOCK_TYPES = BLOCK_TYPES.invert
OSES = {
0 => :oses_ms_dos,
1 => :oses_os_2,
2 => :oses_windows,
3 => :oses_unix,
4 => :oses_mac_os,
5 => :oses_beos,
}
I__OSES = OSES.invert
METHODS = {
48 => :methods_store,
49 => :methods_fastest,
50 => :methods_fast,
51 => :methods_normal,
52 => :methods_good,
53 => :methods_best,
}
I__METHODS = METHODS.invert
def initialize(_io, _parent = nil, _root = self)
super(_io, _parent, _root)
_read
end
def _read
@magic = MagicSignature.new(@_io, self, @_root)
@blocks = []
i = 0
while not @_io.eof?
case magic.version
when 0
@blocks << Block.new(@_io, self, @_root)
when 1
@blocks << BlockV5.new(@_io, self, @_root)
end
i += 1
end
self
end
##
# RAR uses either 7-byte magic for RAR versions 1.5 to 4.0, and
# 8-byte magic (and pretty different block format) for v5+. This
# type would parse and validate both versions of signature. Note
# that actually this signature is a valid RAR "block": in theory,
# one can omit signature reading at all, and read this normally,
# as a block, if exact RAR version is known (and thus it's
# possible to choose correct block format).
class MagicSignature < Kaitai::Struct::Struct
def initialize(_io, _parent = nil, _root = self)
super(_io, _parent, _root)
_read
end
def _read
@magic1 = @_io.read_bytes(6)
raise Kaitai::Struct::ValidationNotEqualError.new([82, 97, 114, 33, 26, 7].pack('C*'), magic1, _io, "/types/magic_signature/seq/0") if not magic1 == [82, 97, 114, 33, 26, 7].pack('C*')
@version = @_io.read_u1
if version == 1
@magic3 = @_io.read_bytes(1)
raise Kaitai::Struct::ValidationNotEqualError.new([0].pack('C*'), magic3, _io, "/types/magic_signature/seq/2") if not magic3 == [0].pack('C*')
end
self
end
##
# Fixed part of file's magic signature that doesn't change with RAR version
attr_reader :magic1
##
# Variable part of magic signature: 0 means old (RAR 1.5-4.0)
# format, 1 means new (RAR 5+) format
attr_reader :version
##
# New format (RAR 5+) magic contains extra byte
attr_reader :magic3
end
##
# Basic block that RAR files consist of. There are several block
# types (see `block_type`), which have different `body` and
# `add_body`.
class Block < Kaitai::Struct::Struct
def initialize(_io, _parent = nil, _root = self)
super(_io, _parent, _root)
_read
end
def _read
@crc16 = @_io.read_u2le
@block_type = Kaitai::Struct::Stream::resolve_enum(Rar::BLOCK_TYPES, @_io.read_u1)
@flags = @_io.read_u2le
@block_size = @_io.read_u2le
if has_add
@add_size = @_io.read_u4le
end
case block_type
when :block_types_file_header
@_raw_body = @_io.read_bytes(body_size)
_io__raw_body = Kaitai::Struct::Stream.new(@_raw_body)
@body = BlockFileHeader.new(_io__raw_body, self, @_root)
else
@body = @_io.read_bytes(body_size)
end
if has_add
@add_body = @_io.read_bytes(add_size)
end
self
end
##
# True if block has additional content attached to it
def has_add
return @has_add unless @has_add.nil?
@has_add = (flags & 32768) != 0
@has_add
end
def header_size
return @header_size unless @header_size.nil?
@header_size = (has_add ? 11 : 7)
@header_size
end
def body_size
return @body_size unless @body_size.nil?
@body_size = (block_size - header_size)
@body_size
end
##
# CRC16 of whole block or some part of it (depends on block type)
attr_reader :crc16
attr_reader :block_type
attr_reader :flags
##
# Size of block (header + body, but without additional content)
attr_reader :block_size
##
# Size of additional content in this block
attr_reader :add_size
attr_reader :body
##
# Additional content in this block
attr_reader :add_body
attr_reader :_raw_body
end
class BlockFileHeader < Kaitai::Struct::Struct
def initialize(_io, _parent = nil, _root = self)
super(_io, _parent, _root)
_read
end
def _read
@low_unp_size = @_io.read_u4le
@host_os = Kaitai::Struct::Stream::resolve_enum(Rar::OSES, @_io.read_u1)
@file_crc32 = @_io.read_u4le
@_raw_file_time = @_io.read_bytes(4)
_io__raw_file_time = Kaitai::Struct::Stream.new(@_raw_file_time)
@file_time = DosDatetime.new(_io__raw_file_time)
@rar_version = @_io.read_u1
@method = Kaitai::Struct::Stream::resolve_enum(Rar::METHODS, @_io.read_u1)
@name_size = @_io.read_u2le
@attr = @_io.read_u4le
if (_parent.flags & 256) != 0
@high_pack_size = @_io.read_u4le
end
@file_name = @_io.read_bytes(name_size)
if (_parent.flags & 1024) != 0
@salt = @_io.read_u8le
end
self
end
##
# Uncompressed file size (lower 32 bits, if 64-bit header flag is present)
attr_reader :low_unp_size
##
# Operating system used for archiving
attr_reader :host_os
attr_reader :file_crc32
##
# Date and time in standard MS DOS format
attr_reader :file_time
##
# RAR version needed to extract file (Version number is encoded as 10 * Major version + minor version.)
attr_reader :rar_version
##
# Compression method
attr_reader :method
##
# File name size
attr_reader :name_size
##
# File attributes
attr_reader :attr
##
# Compressed file size, high 32 bits, only if 64-bit header flag is present
attr_reader :high_pack_size
attr_reader :file_name
attr_reader :salt
attr_reader :_raw_file_time
end
class BlockV5 < Kaitai::Struct::Struct
def initialize(_io, _parent = nil, _root = self)
super(_io, _parent, _root)
_read
end
def _read
self
end
end
##
# File format signature to validate that it is indeed a RAR archive
attr_reader :magic
##
# Sequence of blocks that constitute the RAR file
attr_reader :blocks
end