RAR (Roshall ARchiver) archive files: C++11/STL parsing library

RAR is a archive format used by popular proprietary RAR archiver, created by Eugene Roshal. There are two major versions of format (v1.5-4.0 and RAR v5+).

File format essentially consists of a linear sequence of blocks. Each block has fixed header and custom body (that depends on block type), so it's possible to skip block even if one doesn't know how to process a certain block type.

Application

RAR archiver

File extension

rar

KS implementation details

License: CC0-1.0
Minimal Kaitai Struct required: 0.7

References

This page hosts a formal specification of RAR (Roshall ARchiver) archive files using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.

Usage

Runtime library

All parsing code for C++11/STL generated by Kaitai Struct depends on the C++/STL runtime library. You have to install it before you can parse data.

For C++, the easiest way is to clone the runtime library sources and build them along with your project.

Code

Using Kaitai Struct in C++/STL usually consists of 3 steps.

  1. We need to create an STL input stream (std::istream). One can open local file for that, or use existing std::string or char* buffer.
    #include <fstream>
    
    std::ifstream is("path/to/local/file.rar", std::ifstream::binary);
    
    #include <sstream>
    
    std::istringstream is(str);
    
    #include <sstream>
    
    const char buf[] = { ... };
    std::string str(buf, sizeof buf);
    std::istringstream is(str);
    
  2. We need to wrap our input stream into Kaitai stream:
    #include "kaitai/kaitaistream.h"
    
    kaitai::kstream ks(&is);
    
  3. And finally, we can invoke the parsing:
    rar_t data(&ks);
    

After that, one can get various attributes from the structure by invoking getter methods like:

data.magic() // => File format signature to validate that it is indeed a RAR archive

C++11/STL source code to parse RAR (Roshall ARchiver) archive files

rar.h

#pragma once

// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild

#include "kaitai/kaitaistruct.h"
#include <stdint.h>
#include <memory>
#include "dos_datetime.h"
#include <vector>

#if KAITAI_STRUCT_VERSION < 9000L
#error "Incompatible Kaitai Struct C++/STL API: version 0.9 or later is required"
#endif
class dos_datetime_t;

/**
 * RAR is a archive format used by popular proprietary RAR archiver,
 * created by Eugene Roshal. There are two major versions of format
 * (v1.5-4.0 and RAR v5+).
 * 
 * File format essentially consists of a linear sequence of
 * blocks. Each block has fixed header and custom body (that depends on
 * block type), so it's possible to skip block even if one doesn't know
 * how to process a certain block type.
 * \sa http://acritum.com/winrar/rar-format Source
 */

class rar_t : public kaitai::kstruct {

public:
    class magic_signature_t;
    class block_t;
    class block_file_header_t;
    class block_v5_t;

    enum block_types_t {
        BLOCK_TYPES_MARKER = 114,
        BLOCK_TYPES_ARCHIVE_HEADER = 115,
        BLOCK_TYPES_FILE_HEADER = 116,
        BLOCK_TYPES_OLD_STYLE_COMMENT_HEADER = 117,
        BLOCK_TYPES_OLD_STYLE_AUTHENTICITY_INFO_76 = 118,
        BLOCK_TYPES_OLD_STYLE_SUBBLOCK = 119,
        BLOCK_TYPES_OLD_STYLE_RECOVERY_RECORD = 120,
        BLOCK_TYPES_OLD_STYLE_AUTHENTICITY_INFO_79 = 121,
        BLOCK_TYPES_SUBBLOCK = 122,
        BLOCK_TYPES_TERMINATOR = 123
    };

    enum oses_t {
        OSES_MS_DOS = 0,
        OSES_OS_2 = 1,
        OSES_WINDOWS = 2,
        OSES_UNIX = 3,
        OSES_MAC_OS = 4,
        OSES_BEOS = 5
    };

    enum methods_t {
        METHODS_STORE = 48,
        METHODS_FASTEST = 49,
        METHODS_FAST = 50,
        METHODS_NORMAL = 51,
        METHODS_GOOD = 52,
        METHODS_BEST = 53
    };

    rar_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent = nullptr, rar_t* p__root = nullptr);

private:
    void _read();
    void _clean_up();

public:
    ~rar_t();

    /**
     * RAR uses either 7-byte magic for RAR versions 1.5 to 4.0, and
     * 8-byte magic (and pretty different block format) for v5+. This
     * type would parse and validate both versions of signature. Note
     * that actually this signature is a valid RAR "block": in theory,
     * one can omit signature reading at all, and read this normally,
     * as a block, if exact RAR version is known (and thus it's
     * possible to choose correct block format).
     */

    class magic_signature_t : public kaitai::kstruct {

    public:

        magic_signature_t(kaitai::kstream* p__io, rar_t* p__parent = nullptr, rar_t* p__root = nullptr);

    private:
        void _read();
        void _clean_up();

    public:
        ~magic_signature_t();

    private:
        std::string m_magic1;
        uint8_t m_version;
        std::string m_magic3;
        bool n_magic3;

    public:
        bool _is_null_magic3() { magic3(); return n_magic3; };

    private:
        rar_t* m__root;
        rar_t* m__parent;

    public:

        /**
         * Fixed part of file's magic signature that doesn't change with RAR version
         */
        std::string magic1() const { return m_magic1; }

        /**
         * Variable part of magic signature: 0 means old (RAR 1.5-4.0)
         * format, 1 means new (RAR 5+) format
         */
        uint8_t version() const { return m_version; }

        /**
         * New format (RAR 5+) magic contains extra byte
         */
        std::string magic3() const { return m_magic3; }
        rar_t* _root() const { return m__root; }
        rar_t* _parent() const { return m__parent; }
    };

    /**
     * Basic block that RAR files consist of. There are several block
     * types (see `block_type`), which have different `body` and
     * `add_body`.
     */

    class block_t : public kaitai::kstruct {

    public:

        block_t(kaitai::kstream* p__io, rar_t* p__parent = nullptr, rar_t* p__root = nullptr);

    private:
        void _read();
        void _clean_up();

    public:
        ~block_t();

    private:
        bool f_has_add;
        bool m_has_add;

    public:

        /**
         * True if block has additional content attached to it
         */
        bool has_add();

    private:
        bool f_header_size;
        int8_t m_header_size;

    public:
        int8_t header_size();

    private:
        bool f_body_size;
        int32_t m_body_size;

    public:
        int32_t body_size();

    private:
        uint16_t m_crc16;
        block_types_t m_block_type;
        uint16_t m_flags;
        uint16_t m_block_size;
        uint32_t m_add_size;
        bool n_add_size;

    public:
        bool _is_null_add_size() { add_size(); return n_add_size; };

    private:
        std::unique_ptr<block_file_header_t> m_body;
        bool n_body;

    public:
        bool _is_null_body() { body(); return n_body; };

    private:
        std::string m_add_body;
        bool n_add_body;

    public:
        bool _is_null_add_body() { add_body(); return n_add_body; };

    private:
        rar_t* m__root;
        rar_t* m__parent;
        std::string m__raw_body;
        std::unique_ptr<kaitai::kstream> m__io__raw_body;

    public:

        /**
         * CRC16 of whole block or some part of it (depends on block type)
         */
        uint16_t crc16() const { return m_crc16; }
        block_types_t block_type() const { return m_block_type; }
        uint16_t flags() const { return m_flags; }

        /**
         * Size of block (header + body, but without additional content)
         */
        uint16_t block_size() const { return m_block_size; }

        /**
         * Size of additional content in this block
         */
        uint32_t add_size() const { return m_add_size; }
        block_file_header_t* body() const { return m_body.get(); }

        /**
         * Additional content in this block
         */
        std::string add_body() const { return m_add_body; }
        rar_t* _root() const { return m__root; }
        rar_t* _parent() const { return m__parent; }
        std::string _raw_body() const { return m__raw_body; }
        kaitai::kstream* _io__raw_body() const { return m__io__raw_body.get(); }
    };

    class block_file_header_t : public kaitai::kstruct {

    public:

        block_file_header_t(kaitai::kstream* p__io, rar_t::block_t* p__parent = nullptr, rar_t* p__root = nullptr);

    private:
        void _read();
        void _clean_up();

    public:
        ~block_file_header_t();

    private:
        uint32_t m_low_unp_size;
        oses_t m_host_os;
        uint32_t m_file_crc32;
        std::unique_ptr<dos_datetime_t> m_file_time;
        uint8_t m_rar_version;
        methods_t m_method;
        uint16_t m_name_size;
        uint32_t m_attr;
        uint32_t m_high_pack_size;
        bool n_high_pack_size;

    public:
        bool _is_null_high_pack_size() { high_pack_size(); return n_high_pack_size; };

    private:
        std::string m_file_name;
        uint64_t m_salt;
        bool n_salt;

    public:
        bool _is_null_salt() { salt(); return n_salt; };

    private:
        rar_t* m__root;
        rar_t::block_t* m__parent;
        std::string m__raw_file_time;
        std::unique_ptr<kaitai::kstream> m__io__raw_file_time;

    public:

        /**
         * Uncompressed file size (lower 32 bits, if 64-bit header flag is present)
         */
        uint32_t low_unp_size() const { return m_low_unp_size; }

        /**
         * Operating system used for archiving
         */
        oses_t host_os() const { return m_host_os; }
        uint32_t file_crc32() const { return m_file_crc32; }

        /**
         * Date and time in standard MS DOS format
         */
        dos_datetime_t* file_time() const { return m_file_time.get(); }

        /**
         * RAR version needed to extract file (Version number is encoded as 10 * Major version + minor version.)
         */
        uint8_t rar_version() const { return m_rar_version; }

        /**
         * Compression method
         */
        methods_t method() const { return m_method; }

        /**
         * File name size
         */
        uint16_t name_size() const { return m_name_size; }

        /**
         * File attributes
         */
        uint32_t attr() const { return m_attr; }

        /**
         * Compressed file size, high 32 bits, only if 64-bit header flag is present
         */
        uint32_t high_pack_size() const { return m_high_pack_size; }
        std::string file_name() const { return m_file_name; }
        uint64_t salt() const { return m_salt; }
        rar_t* _root() const { return m__root; }
        rar_t::block_t* _parent() const { return m__parent; }
        std::string _raw_file_time() const { return m__raw_file_time; }
        kaitai::kstream* _io__raw_file_time() const { return m__io__raw_file_time.get(); }
    };

    class block_v5_t : public kaitai::kstruct {

    public:

        block_v5_t(kaitai::kstream* p__io, rar_t* p__parent = nullptr, rar_t* p__root = nullptr);

    private:
        void _read();
        void _clean_up();

    public:
        ~block_v5_t();

    private:
        rar_t* m__root;
        rar_t* m__parent;

    public:
        rar_t* _root() const { return m__root; }
        rar_t* _parent() const { return m__parent; }
    };

private:
    std::unique_ptr<magic_signature_t> m_magic;
    std::unique_ptr<std::vector<std::unique_ptr<kaitai::kstruct>>> m_blocks;
    rar_t* m__root;
    kaitai::kstruct* m__parent;

public:

    /**
     * File format signature to validate that it is indeed a RAR archive
     */
    magic_signature_t* magic() const { return m_magic.get(); }

    /**
     * Sequence of blocks that constitute the RAR file
     */
    std::vector<std::unique_ptr<kaitai::kstruct>>* blocks() const { return m_blocks.get(); }
    rar_t* _root() const { return m__root; }
    kaitai::kstruct* _parent() const { return m__parent; }
};

rar.cpp

// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild

#include "rar.h"
#include "kaitai/exceptions.h"

rar_t::rar_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent, rar_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = this;
    m_magic = nullptr;
    m_blocks = nullptr;
    _read();
}

void rar_t::_read() {
    m_magic = std::unique_ptr<magic_signature_t>(new magic_signature_t(m__io, this, m__root));
    m_blocks = std::unique_ptr<std::vector<std::unique_ptr<kaitai::kstruct>>>(new std::vector<std::unique_ptr<kaitai::kstruct>>());
    {
        int i = 0;
        while (!m__io->is_eof()) {
            switch (magic()->version()) {
            case 0: {
                m_blocks->push_back(std::move(std::unique_ptr<block_t>(new block_t(m__io, this, m__root))));
                break;
            }
            case 1: {
                m_blocks->push_back(std::move(std::unique_ptr<block_v5_t>(new block_v5_t(m__io, this, m__root))));
                break;
            }
            }
            i++;
        }
    }
}

rar_t::~rar_t() {
    _clean_up();
}

void rar_t::_clean_up() {
}

rar_t::magic_signature_t::magic_signature_t(kaitai::kstream* p__io, rar_t* p__parent, rar_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    _read();
}

void rar_t::magic_signature_t::_read() {
    m_magic1 = m__io->read_bytes(6);
    if (!(magic1() == std::string("\x52\x61\x72\x21\x1A\x07", 6))) {
        throw kaitai::validation_not_equal_error<std::string>(std::string("\x52\x61\x72\x21\x1A\x07", 6), magic1(), _io(), std::string("/types/magic_signature/seq/0"));
    }
    m_version = m__io->read_u1();
    n_magic3 = true;
    if (version() == 1) {
        n_magic3 = false;
        m_magic3 = m__io->read_bytes(1);
        if (!(magic3() == std::string("\x00", 1))) {
            throw kaitai::validation_not_equal_error<std::string>(std::string("\x00", 1), magic3(), _io(), std::string("/types/magic_signature/seq/2"));
        }
    }
}

rar_t::magic_signature_t::~magic_signature_t() {
    _clean_up();
}

void rar_t::magic_signature_t::_clean_up() {
    if (!n_magic3) {
    }
}

rar_t::block_t::block_t(kaitai::kstream* p__io, rar_t* p__parent, rar_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    m__io__raw_body = nullptr;
    f_has_add = false;
    f_header_size = false;
    f_body_size = false;
    _read();
}

void rar_t::block_t::_read() {
    m_crc16 = m__io->read_u2le();
    m_block_type = static_cast<rar_t::block_types_t>(m__io->read_u1());
    m_flags = m__io->read_u2le();
    m_block_size = m__io->read_u2le();
    n_add_size = true;
    if (has_add()) {
        n_add_size = false;
        m_add_size = m__io->read_u4le();
    }
    n_body = true;
    switch (block_type()) {
    case rar_t::BLOCK_TYPES_FILE_HEADER: {
        n_body = false;
        m__raw_body = m__io->read_bytes(body_size());
        m__io__raw_body = std::unique_ptr<kaitai::kstream>(new kaitai::kstream(m__raw_body));
        m_body = std::unique_ptr<block_file_header_t>(new block_file_header_t(m__io__raw_body.get(), this, m__root));
        break;
    }
    default: {
        m__raw_body = m__io->read_bytes(body_size());
        break;
    }
    }
    n_add_body = true;
    if (has_add()) {
        n_add_body = false;
        m_add_body = m__io->read_bytes(add_size());
    }
}

rar_t::block_t::~block_t() {
    _clean_up();
}

void rar_t::block_t::_clean_up() {
    if (!n_add_size) {
    }
    if (!n_body) {
    }
    if (!n_add_body) {
    }
}

bool rar_t::block_t::has_add() {
    if (f_has_add)
        return m_has_add;
    m_has_add = (flags() & 32768) != 0;
    f_has_add = true;
    return m_has_add;
}

int8_t rar_t::block_t::header_size() {
    if (f_header_size)
        return m_header_size;
    m_header_size = ((has_add()) ? (11) : (7));
    f_header_size = true;
    return m_header_size;
}

int32_t rar_t::block_t::body_size() {
    if (f_body_size)
        return m_body_size;
    m_body_size = (block_size() - header_size());
    f_body_size = true;
    return m_body_size;
}

rar_t::block_file_header_t::block_file_header_t(kaitai::kstream* p__io, rar_t::block_t* p__parent, rar_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    m_file_time = nullptr;
    m__io__raw_file_time = nullptr;
    _read();
}

void rar_t::block_file_header_t::_read() {
    m_low_unp_size = m__io->read_u4le();
    m_host_os = static_cast<rar_t::oses_t>(m__io->read_u1());
    m_file_crc32 = m__io->read_u4le();
    m__raw_file_time = m__io->read_bytes(4);
    m__io__raw_file_time = std::unique_ptr<kaitai::kstream>(new kaitai::kstream(m__raw_file_time));
    m_file_time = std::unique_ptr<dos_datetime_t>(new dos_datetime_t(m__io__raw_file_time.get()));
    m_rar_version = m__io->read_u1();
    m_method = static_cast<rar_t::methods_t>(m__io->read_u1());
    m_name_size = m__io->read_u2le();
    m_attr = m__io->read_u4le();
    n_high_pack_size = true;
    if ((_parent()->flags() & 256) != 0) {
        n_high_pack_size = false;
        m_high_pack_size = m__io->read_u4le();
    }
    m_file_name = m__io->read_bytes(name_size());
    n_salt = true;
    if ((_parent()->flags() & 1024) != 0) {
        n_salt = false;
        m_salt = m__io->read_u8le();
    }
}

rar_t::block_file_header_t::~block_file_header_t() {
    _clean_up();
}

void rar_t::block_file_header_t::_clean_up() {
    if (!n_high_pack_size) {
    }
    if (!n_salt) {
    }
}

rar_t::block_v5_t::block_v5_t(kaitai::kstream* p__io, rar_t* p__parent, rar_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    _read();
}

void rar_t::block_v5_t::_read() {
}

rar_t::block_v5_t::~block_v5_t() {
    _clean_up();
}

void rar_t::block_v5_t::_clean_up() {
}