XAR (eXtensible ARchive): C++98/STL parsing library

From Wikipedia:

"XAR (short for eXtensible ARchive format) is an open source file archiver and the archiver's file format. It was created within the OpenDarwin project and is used in macOS X 10.5 and up for software installation routines, as well as browser extensions in Safari 5.0 and up."

File extension

["xar", "pkg", "xip"]

KS implementation details

License: CC0-1.0
Minimal Kaitai Struct required: 0.9

References

This page hosts a formal specification of XAR (eXtensible ARchive) using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.

Usage

Runtime library

All parsing code for C++98/STL generated by Kaitai Struct depends on the C++/STL runtime library. You have to install it before you can parse data.

For C++, the easiest way is to clone the runtime library sources and build them along with your project.

Code

Using Kaitai Struct in C++/STL usually consists of 3 steps.

  1. We need to create an STL input stream (std::istream). One can open local file for that, or use existing std::string or char* buffer.
    #include <fstream>
    
    std::ifstream is("path/to/local/file.xar", std::ifstream::binary);
    
    #include <sstream>
    
    std::istringstream is(str);
    
    #include <sstream>
    
    const char buf[] = { ... };
    std::string str(buf, sizeof buf);
    std::istringstream is(str);
    
  2. We need to wrap our input stream into Kaitai stream:
    #include "kaitai/kaitaistream.h"
    
    kaitai::kstream ks(&is);
    
  3. And finally, we can invoke the parsing:
    xar_t data(&ks);
    

After that, one can get various attributes from the structure by invoking getter methods like:

data.header_prefix() // => internal; access `_root.header` instead

C++98/STL source code to parse XAR (eXtensible ARchive)

xar.h

#ifndef XAR_H_
#define XAR_H_

// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild

#include "kaitai/kaitaistruct.h"
#include <stdint.h>

#if KAITAI_STRUCT_VERSION < 9000L
#error "Incompatible Kaitai Struct C++/STL API: version 0.9 or later is required"
#endif

/**
 * From [Wikipedia](https://en.wikipedia.org/wiki/Xar_(archiver)):
 * 
 * "XAR (short for eXtensible ARchive format) is an open source file archiver
 * and the archiver's file format. It was created within the OpenDarwin project
 * and is used in macOS X 10.5 and up for software installation routines, as
 * well as browser extensions in Safari 5.0 and up."
 * \sa https://github.com/mackyle/xar/wiki/xarformat Source
 */

class xar_t : public kaitai::kstruct {

public:
    class file_header_prefix_t;
    class file_header_t;
    class toc_type_t;

    enum checksum_algorithms_apple_t {
        CHECKSUM_ALGORITHMS_APPLE_NONE = 0,
        CHECKSUM_ALGORITHMS_APPLE_SHA1 = 1,
        CHECKSUM_ALGORITHMS_APPLE_MD5 = 2,
        CHECKSUM_ALGORITHMS_APPLE_SHA256 = 3,
        CHECKSUM_ALGORITHMS_APPLE_SHA512 = 4
    };

    xar_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent = 0, xar_t* p__root = 0);

private:
    void _read();
    void _clean_up();

public:
    ~xar_t();

    class file_header_prefix_t : public kaitai::kstruct {

    public:

        file_header_prefix_t(kaitai::kstream* p__io, xar_t* p__parent = 0, xar_t* p__root = 0);

    private:
        void _read();
        void _clean_up();

    public:
        ~file_header_prefix_t();

    private:
        std::string m_magic;
        uint16_t m_len_header;
        xar_t* m__root;
        xar_t* m__parent;

    public:
        std::string magic() const { return m_magic; }

        /**
         * internal; access `_root.header.len_header` instead
         */
        uint16_t len_header() const { return m_len_header; }
        xar_t* _root() const { return m__root; }
        xar_t* _parent() const { return m__parent; }
    };

    class file_header_t : public kaitai::kstruct {

    public:

        file_header_t(kaitai::kstream* p__io, xar_t* p__parent = 0, xar_t* p__root = 0);

    private:
        void _read();
        void _clean_up();

    public:
        ~file_header_t();

    private:
        bool f_checksum_algorithm_name;
        std::string m_checksum_algorithm_name;

    public:

        /**
         * If it is not
         * 
         * * `""` (empty string), indicating an unknown integer value (access
         *   `checksum_algorithm_int` for debugging purposes to find out
         *   what that value is), or
         * * `"none"`, indicating that the TOC checksum is not provided (in that
         *   case, the `<checksum>` property or its `style` attribute should be
         *   missing, or the `style` attribute must be set to `"none"`),
         * 
         * it must exactly match the `style` attribute value of the
         * `<checksum>` property in the root node `<toc>`. See
         * <https://github.com/mackyle/xar/blob/66d451d/xar/lib/archive.c#L345-L371>
         * for reference.
         * 
         * The `xar` (eXtensible ARchiver) program [uses OpenSSL's function
         * `EVP_get_digestbyname`](
         *   https://github.com/mackyle/xar/blob/66d451d/xar/lib/archive.c#L328
         * ) to verify this value (if it's not `""` or `"none"`, of course).
         * So it's reasonable to assume that this can only have one of the values
         * that OpenSSL recognizes.
         */
        std::string checksum_algorithm_name();

    private:
        bool f_has_checksum_alg_name;
        bool m_has_checksum_alg_name;

    public:
        bool has_checksum_alg_name();

    private:
        bool f_len_header;
        uint16_t m_len_header;

    public:
        uint16_t len_header();

    private:
        uint16_t m_version;
        uint64_t m_len_toc_compressed;
        uint64_t m_toc_length_uncompressed;
        uint32_t m_checksum_algorithm_int;
        std::string m_checksum_alg_name;
        bool n_checksum_alg_name;

    public:
        bool _is_null_checksum_alg_name() { checksum_alg_name(); return n_checksum_alg_name; };

    private:
        xar_t* m__root;
        xar_t* m__parent;

    public:
        uint16_t version() const { return m_version; }
        uint64_t len_toc_compressed() const { return m_len_toc_compressed; }
        uint64_t toc_length_uncompressed() const { return m_toc_length_uncompressed; }

        /**
         * internal; access `checksum_algorithm_name` instead
         */
        uint32_t checksum_algorithm_int() const { return m_checksum_algorithm_int; }

        /**
         * internal; access `checksum_algorithm_name` instead
         */
        std::string checksum_alg_name() const { return m_checksum_alg_name; }
        xar_t* _root() const { return m__root; }
        xar_t* _parent() const { return m__parent; }
    };

    class toc_type_t : public kaitai::kstruct {

    public:

        toc_type_t(kaitai::kstream* p__io, xar_t* p__parent = 0, xar_t* p__root = 0);

    private:
        void _read();
        void _clean_up();

    public:
        ~toc_type_t();

    private:
        std::string m_xml_string;
        xar_t* m__root;
        xar_t* m__parent;

    public:
        std::string xml_string() const { return m_xml_string; }
        xar_t* _root() const { return m__root; }
        xar_t* _parent() const { return m__parent; }
    };

private:
    bool f_checksum_algorithm_other;
    int8_t m_checksum_algorithm_other;

public:

    /**
     * \sa https://github.com/mackyle/xar/blob/66d451d/xar/include/xar.h.in#L85 Source
     */
    int8_t checksum_algorithm_other();

private:
    file_header_prefix_t* m_header_prefix;
    file_header_t* m_header;
    toc_type_t* m_toc;
    xar_t* m__root;
    kaitai::kstruct* m__parent;
    std::string m__raw_header;
    kaitai::kstream* m__io__raw_header;
    std::string m__raw_toc;
    kaitai::kstream* m__io__raw_toc;
    std::string m__raw__raw_toc;

public:

    /**
     * internal; access `_root.header` instead
     */
    file_header_prefix_t* header_prefix() const { return m_header_prefix; }
    file_header_t* header() const { return m_header; }

    /**
     * zlib compressed XML further describing the content of the archive
     */
    toc_type_t* toc() const { return m_toc; }
    xar_t* _root() const { return m__root; }
    kaitai::kstruct* _parent() const { return m__parent; }
    std::string _raw_header() const { return m__raw_header; }
    kaitai::kstream* _io__raw_header() const { return m__io__raw_header; }
    std::string _raw_toc() const { return m__raw_toc; }
    kaitai::kstream* _io__raw_toc() const { return m__io__raw_toc; }
    std::string _raw__raw_toc() const { return m__raw__raw_toc; }
};

#endif  // XAR_H_

xar.cpp

// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild

#include "xar.h"
#include "kaitai/exceptions.h"

xar_t::xar_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent, xar_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = this;
    m_header_prefix = 0;
    m_header = 0;
    m__io__raw_header = 0;
    m_toc = 0;
    m__io__raw_toc = 0;
    f_checksum_algorithm_other = false;

    try {
        _read();
    } catch(...) {
        _clean_up();
        throw;
    }
}

void xar_t::_read() {
    m_header_prefix = new file_header_prefix_t(m__io, this, m__root);
    m__raw_header = m__io->read_bytes((header_prefix()->len_header() - 6));
    m__io__raw_header = new kaitai::kstream(m__raw_header);
    m_header = new file_header_t(m__io__raw_header, this, m__root);
    m__raw__raw_toc = m__io->read_bytes(header()->len_toc_compressed());
    m__raw_toc = kaitai::kstream::process_zlib(m__raw__raw_toc);
    m__io__raw_toc = new kaitai::kstream(m__raw_toc);
    m_toc = new toc_type_t(m__io__raw_toc, this, m__root);
}

xar_t::~xar_t() {
    _clean_up();
}

void xar_t::_clean_up() {
    if (m_header_prefix) {
        delete m_header_prefix; m_header_prefix = 0;
    }
    if (m__io__raw_header) {
        delete m__io__raw_header; m__io__raw_header = 0;
    }
    if (m_header) {
        delete m_header; m_header = 0;
    }
    if (m__io__raw_toc) {
        delete m__io__raw_toc; m__io__raw_toc = 0;
    }
    if (m_toc) {
        delete m_toc; m_toc = 0;
    }
}

xar_t::file_header_prefix_t::file_header_prefix_t(kaitai::kstream* p__io, xar_t* p__parent, xar_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;

    try {
        _read();
    } catch(...) {
        _clean_up();
        throw;
    }
}

void xar_t::file_header_prefix_t::_read() {
    m_magic = m__io->read_bytes(4);
    if (!(magic() == std::string("\x78\x61\x72\x21", 4))) {
        throw kaitai::validation_not_equal_error<std::string>(std::string("\x78\x61\x72\x21", 4), magic(), _io(), std::string("/types/file_header_prefix/seq/0"));
    }
    m_len_header = m__io->read_u2be();
}

xar_t::file_header_prefix_t::~file_header_prefix_t() {
    _clean_up();
}

void xar_t::file_header_prefix_t::_clean_up() {
}

xar_t::file_header_t::file_header_t(kaitai::kstream* p__io, xar_t* p__parent, xar_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    f_checksum_algorithm_name = false;
    f_has_checksum_alg_name = false;
    f_len_header = false;

    try {
        _read();
    } catch(...) {
        _clean_up();
        throw;
    }
}

void xar_t::file_header_t::_read() {
    m_version = m__io->read_u2be();
    if (!(version() == 1)) {
        throw kaitai::validation_not_equal_error<uint16_t>(1, version(), _io(), std::string("/types/file_header/seq/0"));
    }
    m_len_toc_compressed = m__io->read_u8be();
    m_toc_length_uncompressed = m__io->read_u8be();
    m_checksum_algorithm_int = m__io->read_u4be();
    n_checksum_alg_name = true;
    if (has_checksum_alg_name()) {
        n_checksum_alg_name = false;
        m_checksum_alg_name = kaitai::kstream::bytes_to_str(kaitai::kstream::bytes_terminate(m__io->read_bytes_full(), 0, false), std::string("UTF-8"));
        {
            std::string _ = checksum_alg_name();
            if (!( ((_ != std::string("")) && (_ != std::string("none"))) )) {
                throw kaitai::validation_expr_error<std::string>(checksum_alg_name(), _io(), std::string("/types/file_header/seq/4"));
            }
        }
    }
}

xar_t::file_header_t::~file_header_t() {
    _clean_up();
}

void xar_t::file_header_t::_clean_up() {
    if (!n_checksum_alg_name) {
    }
}

std::string xar_t::file_header_t::checksum_algorithm_name() {
    if (f_checksum_algorithm_name)
        return m_checksum_algorithm_name;
    m_checksum_algorithm_name = ((has_checksum_alg_name()) ? (checksum_alg_name()) : (((checksum_algorithm_int() == xar_t::CHECKSUM_ALGORITHMS_APPLE_NONE) ? (std::string("none")) : (((checksum_algorithm_int() == xar_t::CHECKSUM_ALGORITHMS_APPLE_SHA1) ? (std::string("sha1")) : (((checksum_algorithm_int() == xar_t::CHECKSUM_ALGORITHMS_APPLE_MD5) ? (std::string("md5")) : (((checksum_algorithm_int() == xar_t::CHECKSUM_ALGORITHMS_APPLE_SHA256) ? (std::string("sha256")) : (((checksum_algorithm_int() == xar_t::CHECKSUM_ALGORITHMS_APPLE_SHA512) ? (std::string("sha512")) : (std::string("")))))))))))));
    f_checksum_algorithm_name = true;
    return m_checksum_algorithm_name;
}

bool xar_t::file_header_t::has_checksum_alg_name() {
    if (f_has_checksum_alg_name)
        return m_has_checksum_alg_name;
    m_has_checksum_alg_name =  ((checksum_algorithm_int() == _root()->checksum_algorithm_other()) && (len_header() >= 32) && (kaitai::kstream::mod(len_header(), 4) == 0)) ;
    f_has_checksum_alg_name = true;
    return m_has_checksum_alg_name;
}

uint16_t xar_t::file_header_t::len_header() {
    if (f_len_header)
        return m_len_header;
    m_len_header = _root()->header_prefix()->len_header();
    f_len_header = true;
    return m_len_header;
}

xar_t::toc_type_t::toc_type_t(kaitai::kstream* p__io, xar_t* p__parent, xar_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;

    try {
        _read();
    } catch(...) {
        _clean_up();
        throw;
    }
}

void xar_t::toc_type_t::_read() {
    m_xml_string = kaitai::kstream::bytes_to_str(m__io->read_bytes_full(), std::string("UTF-8"));
}

xar_t::toc_type_t::~toc_type_t() {
    _clean_up();
}

void xar_t::toc_type_t::_clean_up() {
}

int8_t xar_t::checksum_algorithm_other() {
    if (f_checksum_algorithm_other)
        return m_checksum_algorithm_other;
    m_checksum_algorithm_other = 3;
    f_checksum_algorithm_other = true;
    return m_checksum_algorithm_other;
}