Microsoft Compound File Binary (CFB), AKA OLE (Object Linking and Embedding) file format: C++/STL parsing library

This page hosts a formal specification of Microsoft Compound File Binary (CFB), AKA OLE (Object Linking and Embedding) file format using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.

Usage

Using Kaitai Struct in C++/STL usually consists of 3 steps.

  1. We need to create an STL input stream (std::istream).
    • One can open a stream for reading from a local file:
      #include <fstream>
      
      std::ifstream is("path/to/local/file.microsoft_cfb", std::ifstream::binary);
    • Or one can prepare a stream for reading from existing std::string str:
      #include <sstream>
      
      std::istringstream is(str);
    • Or one can parse arbitrary char* buffer in memory, given that we know its size:
      #include <sstream>
      
      const char buf[] = { ... };
      std::string str(buf, sizeof buf);
      std::istringstream is(str);
  2. We need to wrap our input stream into Kaitai stream:
    #include <kaitai/kaitaistream.h>
    
    kaitai::kstream ks(&is);
  3. And finally, we can invoke the parsing:
    microsoft_cfb_t data(&ks);

After that, one can get various attributes from the structure by invoking getter methods like:

data.header() // => get header

C++/STL source code to parse Microsoft Compound File Binary (CFB), AKA OLE (Object Linking and Embedding) file format

microsoft_cfb.h

#ifndef MICROSOFT_CFB_H_
#define MICROSOFT_CFB_H_

// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild

#include "kaitai/kaitaistruct.h"

#include <stdint.h>
#include <vector>

#if KAITAI_STRUCT_VERSION < 7000L
#error "Incompatible Kaitai Struct C++/STL API: version 0.7 or later is required"
#endif

class microsoft_cfb_t : public kaitai::kstruct {

public:
    class cfb_header_t;
    class fat_entries_t;
    class dir_entry_t;

    microsoft_cfb_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent = 0, microsoft_cfb_t* p__root = 0);

private:
    void _read();

public:
    ~microsoft_cfb_t();

    class cfb_header_t : public kaitai::kstruct {

    public:

        cfb_header_t(kaitai::kstream* p__io, microsoft_cfb_t* p__parent = 0, microsoft_cfb_t* p__root = 0);

    private:
        void _read();

    public:
        ~cfb_header_t();

    private:
        std::string m_signature;
        std::string m_clsid;
        uint16_t m_version_minor;
        uint16_t m_version_major;
        std::string m_byte_order;
        uint16_t m_sector_shift;
        uint16_t m_mini_sector_shift;
        std::string m_reserved1;
        int32_t m_size_dir;
        int32_t m_size_fat;
        int32_t m_ofs_dir;
        int32_t m_transaction_seq;
        int32_t m_mini_stream_cutoff_size;
        int32_t m_ofs_mini_fat;
        int32_t m_size_mini_fat;
        int32_t m_ofs_difat;
        int32_t m_size_difat;
        std::vector<int32_t>* m_difat;
        microsoft_cfb_t* m__root;
        microsoft_cfb_t* m__parent;

    public:

        /**
         * Magic bytes that confirm that this is a CFB file
         */
        std::string signature() const { return m_signature; }

        /**
         * Reserved class ID field, must be all 0
         */
        std::string clsid() const { return m_clsid; }
        uint16_t version_minor() const { return m_version_minor; }
        uint16_t version_major() const { return m_version_major; }

        /**
         * In theory, specifies a byte order. In practice, no other values besides FE FF (which imply little endian order) are used.
         */
        std::string byte_order() const { return m_byte_order; }

        /**
         * For major version 3, must be 0x9 (sector size = 512 bytes). For major version 4, must be 0xc (sector size = 4096 bytes).
         */
        uint16_t sector_shift() const { return m_sector_shift; }
        uint16_t mini_sector_shift() const { return m_mini_sector_shift; }
        std::string reserved1() const { return m_reserved1; }

        /**
         * Number of directory sectors in this file. For major version 3, must be 0.
         */
        int32_t size_dir() const { return m_size_dir; }

        /**
         * Number of FAT sectors in this file.
         */
        int32_t size_fat() const { return m_size_fat; }

        /**
         * Starting sector number for directory stream.
         */
        int32_t ofs_dir() const { return m_ofs_dir; }

        /**
         * A transaction sequence number, which is incremented each time the file is saved if transactions are implemented, 0 otherwise.
         */
        int32_t transaction_seq() const { return m_transaction_seq; }
        int32_t mini_stream_cutoff_size() const { return m_mini_stream_cutoff_size; }

        /**
         * Starting sector number for mini FAT.
         */
        int32_t ofs_mini_fat() const { return m_ofs_mini_fat; }

        /**
         * Number of mini FAT sectors in this file.
         */
        int32_t size_mini_fat() const { return m_size_mini_fat; }

        /**
         * Starting sector number for DIFAT.
         */
        int32_t ofs_difat() const { return m_ofs_difat; }

        /**
         * Number of DIFAT sectors in this file.
         */
        int32_t size_difat() const { return m_size_difat; }
        std::vector<int32_t>* difat() const { return m_difat; }
        microsoft_cfb_t* _root() const { return m__root; }
        microsoft_cfb_t* _parent() const { return m__parent; }
    };

    class fat_entries_t : public kaitai::kstruct {

    public:

        fat_entries_t(kaitai::kstream* p__io, microsoft_cfb_t* p__parent = 0, microsoft_cfb_t* p__root = 0);

    private:
        void _read();

    public:
        ~fat_entries_t();

    private:
        std::vector<int32_t>* m_entries;
        microsoft_cfb_t* m__root;
        microsoft_cfb_t* m__parent;

    public:
        std::vector<int32_t>* entries() const { return m_entries; }
        microsoft_cfb_t* _root() const { return m__root; }
        microsoft_cfb_t* _parent() const { return m__parent; }
    };

    class dir_entry_t : public kaitai::kstruct {

    public:

        enum obj_type_t {
            OBJ_TYPE_UNKNOWN = 0,
            OBJ_TYPE_STORAGE = 1,
            OBJ_TYPE_STREAM = 2,
            OBJ_TYPE_ROOT_STORAGE = 5
        };

        enum rb_color_t {
            RB_COLOR_RED = 0,
            RB_COLOR_BLACK = 1
        };

        dir_entry_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent = 0, microsoft_cfb_t* p__root = 0);

    private:
        void _read();

    public:
        ~dir_entry_t();

    private:
        bool f_mini_stream;
        std::string m_mini_stream;
        bool n_mini_stream;

    public:
        bool _is_null_mini_stream() { mini_stream(); return n_mini_stream; };

    private:

    public:
        std::string mini_stream();

    private:
        bool f_child;
        dir_entry_t* m_child;
        bool n_child;

    public:
        bool _is_null_child() { child(); return n_child; };

    private:

    public:
        dir_entry_t* child();

    private:
        bool f_left_sibling;
        dir_entry_t* m_left_sibling;
        bool n_left_sibling;

    public:
        bool _is_null_left_sibling() { left_sibling(); return n_left_sibling; };

    private:

    public:
        dir_entry_t* left_sibling();

    private:
        bool f_right_sibling;
        dir_entry_t* m_right_sibling;
        bool n_right_sibling;

    public:
        bool _is_null_right_sibling() { right_sibling(); return n_right_sibling; };

    private:

    public:
        dir_entry_t* right_sibling();

    private:
        std::string m_name;
        uint16_t m_name_len;
        obj_type_t m_object_type;
        rb_color_t m_color_flag;
        int32_t m_left_sibling_id;
        int32_t m_right_sibling_id;
        int32_t m_child_id;
        std::string m_clsid;
        uint32_t m_state;
        uint64_t m_time_create;
        uint64_t m_time_mod;
        int32_t m_ofs;
        uint64_t m_size;
        microsoft_cfb_t* m__root;
        kaitai::kstruct* m__parent;

    public:
        std::string name() const { return m_name; }
        uint16_t name_len() const { return m_name_len; }
        obj_type_t object_type() const { return m_object_type; }
        rb_color_t color_flag() const { return m_color_flag; }
        int32_t left_sibling_id() const { return m_left_sibling_id; }
        int32_t right_sibling_id() const { return m_right_sibling_id; }
        int32_t child_id() const { return m_child_id; }
        std::string clsid() const { return m_clsid; }

        /**
         * User-defined flags for storage or root storage objects
         */
        uint32_t state() const { return m_state; }

        /**
         * Creation time, in Windows FILETIME format (number of 100-nanosecond intervals since January 1, 1601, UTC)
         */
        uint64_t time_create() const { return m_time_create; }

        /**
         * Modification time, in Windows FILETIME format (number of 100-nanosecond intervals since January 1, 1601, UTC).
         */
        uint64_t time_mod() const { return m_time_mod; }

        /**
         * For stream object, number of starting sector. For a root storage object, first sector of the mini stream, if the mini stream exists.
         */
        int32_t ofs() const { return m_ofs; }

        /**
         * For stream object, size of user-defined data in bytes. For a root storage object, size of the mini stream.
         */
        uint64_t size() const { return m_size; }
        microsoft_cfb_t* _root() const { return m__root; }
        kaitai::kstruct* _parent() const { return m__parent; }
    };

private:
    bool f_sector_size;
    int32_t m_sector_size;

public:
    int32_t sector_size();

private:
    bool f_fat;
    fat_entries_t* m_fat;

public:
    fat_entries_t* fat();

private:
    bool f_dir;
    dir_entry_t* m_dir;

public:
    dir_entry_t* dir();

private:
    cfb_header_t* m_header;
    microsoft_cfb_t* m__root;
    kaitai::kstruct* m__parent;
    std::string m__raw_fat;
    kaitai::kstream* m__io__raw_fat;

public:
    cfb_header_t* header() const { return m_header; }
    microsoft_cfb_t* _root() const { return m__root; }
    kaitai::kstruct* _parent() const { return m__parent; }
    std::string _raw_fat() const { return m__raw_fat; }
    kaitai::kstream* _io__raw_fat() const { return m__io__raw_fat; }
};

#endif  // MICROSOFT_CFB_H_

microsoft_cfb.cpp

// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild

#include "microsoft_cfb.h"



microsoft_cfb_t::microsoft_cfb_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent, microsoft_cfb_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = this;
    f_sector_size = false;
    f_fat = false;
    f_dir = false;
    _read();
}

void microsoft_cfb_t::_read() {
    m_header = new cfb_header_t(m__io, this, m__root);
}

microsoft_cfb_t::~microsoft_cfb_t() {
    delete m_header;
    if (f_fat) {
        delete m__io__raw_fat;
        delete m_fat;
    }
    if (f_dir) {
        delete m_dir;
    }
}

microsoft_cfb_t::cfb_header_t::cfb_header_t(kaitai::kstream* p__io, microsoft_cfb_t* p__parent, microsoft_cfb_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    _read();
}

void microsoft_cfb_t::cfb_header_t::_read() {
    m_signature = m__io->ensure_fixed_contents(std::string("\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", 8));
    m_clsid = m__io->ensure_fixed_contents(std::string("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", 16));
    m_version_minor = m__io->read_u2le();
    m_version_major = m__io->read_u2le();
    m_byte_order = m__io->ensure_fixed_contents(std::string("\xFE\xFF", 2));
    m_sector_shift = m__io->read_u2le();
    m_mini_sector_shift = m__io->read_u2le();
    m_reserved1 = m__io->read_bytes(6);
    m_size_dir = m__io->read_s4le();
    m_size_fat = m__io->read_s4le();
    m_ofs_dir = m__io->read_s4le();
    m_transaction_seq = m__io->read_s4le();
    m_mini_stream_cutoff_size = m__io->read_s4le();
    m_ofs_mini_fat = m__io->read_s4le();
    m_size_mini_fat = m__io->read_s4le();
    m_ofs_difat = m__io->read_s4le();
    m_size_difat = m__io->read_s4le();
    int l_difat = 109;
    m_difat = new std::vector<int32_t>();
    m_difat->reserve(l_difat);
    for (int i = 0; i < l_difat; i++) {
        m_difat->push_back(m__io->read_s4le());
    }
}

microsoft_cfb_t::cfb_header_t::~cfb_header_t() {
    delete m_difat;
}

microsoft_cfb_t::fat_entries_t::fat_entries_t(kaitai::kstream* p__io, microsoft_cfb_t* p__parent, microsoft_cfb_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    _read();
}

void microsoft_cfb_t::fat_entries_t::_read() {
    m_entries = new std::vector<int32_t>();
    {
        int i = 0;
        while (!m__io->is_eof()) {
            m_entries->push_back(m__io->read_s4le());
            i++;
        }
    }
}

microsoft_cfb_t::fat_entries_t::~fat_entries_t() {
    delete m_entries;
}

microsoft_cfb_t::dir_entry_t::dir_entry_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent, microsoft_cfb_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    f_mini_stream = false;
    f_child = false;
    f_left_sibling = false;
    f_right_sibling = false;
    _read();
}

void microsoft_cfb_t::dir_entry_t::_read() {
    m_name = kaitai::kstream::bytes_to_str(m__io->read_bytes(64), std::string("UTF-16LE"));
    m_name_len = m__io->read_u2le();
    m_object_type = static_cast<microsoft_cfb_t::dir_entry_t::obj_type_t>(m__io->read_u1());
    m_color_flag = static_cast<microsoft_cfb_t::dir_entry_t::rb_color_t>(m__io->read_u1());
    m_left_sibling_id = m__io->read_s4le();
    m_right_sibling_id = m__io->read_s4le();
    m_child_id = m__io->read_s4le();
    m_clsid = m__io->read_bytes(16);
    m_state = m__io->read_u4le();
    m_time_create = m__io->read_u8le();
    m_time_mod = m__io->read_u8le();
    m_ofs = m__io->read_s4le();
    m_size = m__io->read_u8le();
}

microsoft_cfb_t::dir_entry_t::~dir_entry_t() {
    if (f_mini_stream && !n_mini_stream) {
    }
    if (f_child && !n_child) {
        delete m_child;
    }
    if (f_left_sibling && !n_left_sibling) {
        delete m_left_sibling;
    }
    if (f_right_sibling && !n_right_sibling) {
        delete m_right_sibling;
    }
}

std::string microsoft_cfb_t::dir_entry_t::mini_stream() {
    if (f_mini_stream)
        return m_mini_stream;
    n_mini_stream = true;
    if (object_type() == OBJ_TYPE_ROOT_STORAGE) {
        n_mini_stream = false;
        kaitai::kstream *io = _root()->_io();
        std::streampos _pos = io->pos();
        io->seek(((ofs() + 1) * _root()->sector_size()));
        m_mini_stream = io->read_bytes(size());
        io->seek(_pos);
    }
    f_mini_stream = true;
    return m_mini_stream;
}

microsoft_cfb_t::dir_entry_t* microsoft_cfb_t::dir_entry_t::child() {
    if (f_child)
        return m_child;
    n_child = true;
    if (child_id() != -1) {
        n_child = false;
        kaitai::kstream *io = _root()->_io();
        std::streampos _pos = io->pos();
        io->seek((((_root()->header()->ofs_dir() + 1) * _root()->sector_size()) + (child_id() * 128)));
        m_child = new dir_entry_t(io, this, m__root);
        io->seek(_pos);
    }
    f_child = true;
    return m_child;
}

microsoft_cfb_t::dir_entry_t* microsoft_cfb_t::dir_entry_t::left_sibling() {
    if (f_left_sibling)
        return m_left_sibling;
    n_left_sibling = true;
    if (left_sibling_id() != -1) {
        n_left_sibling = false;
        kaitai::kstream *io = _root()->_io();
        std::streampos _pos = io->pos();
        io->seek((((_root()->header()->ofs_dir() + 1) * _root()->sector_size()) + (left_sibling_id() * 128)));
        m_left_sibling = new dir_entry_t(io, this, m__root);
        io->seek(_pos);
    }
    f_left_sibling = true;
    return m_left_sibling;
}

microsoft_cfb_t::dir_entry_t* microsoft_cfb_t::dir_entry_t::right_sibling() {
    if (f_right_sibling)
        return m_right_sibling;
    n_right_sibling = true;
    if (right_sibling_id() != -1) {
        n_right_sibling = false;
        kaitai::kstream *io = _root()->_io();
        std::streampos _pos = io->pos();
        io->seek((((_root()->header()->ofs_dir() + 1) * _root()->sector_size()) + (right_sibling_id() * 128)));
        m_right_sibling = new dir_entry_t(io, this, m__root);
        io->seek(_pos);
    }
    f_right_sibling = true;
    return m_right_sibling;
}

int32_t microsoft_cfb_t::sector_size() {
    if (f_sector_size)
        return m_sector_size;
    m_sector_size = (1 << header()->sector_shift());
    f_sector_size = true;
    return m_sector_size;
}

microsoft_cfb_t::fat_entries_t* microsoft_cfb_t::fat() {
    if (f_fat)
        return m_fat;
    std::streampos _pos = m__io->pos();
    m__io->seek(sector_size());
    m__raw_fat = m__io->read_bytes((header()->size_fat() * sector_size()));
    m__io__raw_fat = new kaitai::kstream(m__raw_fat);
    m_fat = new fat_entries_t(m__io__raw_fat, this, m__root);
    m__io->seek(_pos);
    f_fat = true;
    return m_fat;
}

microsoft_cfb_t::dir_entry_t* microsoft_cfb_t::dir() {
    if (f_dir)
        return m_dir;
    std::streampos _pos = m__io->pos();
    m__io->seek(((header()->ofs_dir() + 1) * sector_size()));
    m_dir = new dir_entry_t(m__io, this, m__root);
    m__io->seek(_pos);
    f_dir = true;
    return m_dir;
}