This page hosts a formal specification of Microsoft Compound File Binary (CFB), AKA OLE (Object Linking and Embedding) file format using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.
All parsing code for C++98/STL generated by Kaitai Struct depends on the C++/STL runtime library. You have to install it before you can parse data.
For C++, the easiest way is to clone the runtime library sources and build them along with your project.
Using Kaitai Struct in C++/STL usually consists of 3 steps.
std::istream
). One can open local file for that, or use existing std::string
or char*
buffer.
#include <fstream>
std::ifstream is("path/to/local/file.bin", std::ifstream::binary);
#include <sstream>
std::istringstream is(str);
#include <sstream>
const char buf[] = { ... };
std::string str(buf, sizeof buf);
std::istringstream is(str);
#include "kaitai/kaitaistream.h"
kaitai::kstream ks(&is);
microsoft_cfb_t data(&ks);
After that, one can get various attributes from the structure by invoking getter methods like:
data.header() // => get header
#ifndef MICROSOFT_CFB_H_
#define MICROSOFT_CFB_H_
// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild
#include "kaitai/kaitaistruct.h"
#include <stdint.h>
#include <vector>
#if KAITAI_STRUCT_VERSION < 9000L
#error "Incompatible Kaitai Struct C++/STL API: version 0.9 or later is required"
#endif
class microsoft_cfb_t : public kaitai::kstruct {
public:
class cfb_header_t;
class fat_entries_t;
class dir_entry_t;
microsoft_cfb_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent = 0, microsoft_cfb_t* p__root = 0);
private:
void _read();
void _clean_up();
public:
~microsoft_cfb_t();
class cfb_header_t : public kaitai::kstruct {
public:
cfb_header_t(kaitai::kstream* p__io, microsoft_cfb_t* p__parent = 0, microsoft_cfb_t* p__root = 0);
private:
void _read();
void _clean_up();
public:
~cfb_header_t();
private:
std::string m_signature;
std::string m_clsid;
uint16_t m_version_minor;
uint16_t m_version_major;
std::string m_byte_order;
uint16_t m_sector_shift;
uint16_t m_mini_sector_shift;
std::string m_reserved1;
int32_t m_size_dir;
int32_t m_size_fat;
int32_t m_ofs_dir;
int32_t m_transaction_seq;
int32_t m_mini_stream_cutoff_size;
int32_t m_ofs_mini_fat;
int32_t m_size_mini_fat;
int32_t m_ofs_difat;
int32_t m_size_difat;
std::vector<int32_t>* m_difat;
microsoft_cfb_t* m__root;
microsoft_cfb_t* m__parent;
public:
/**
* Magic bytes that confirm that this is a CFB file
*/
std::string signature() const { return m_signature; }
/**
* Reserved class ID field, must be all 0
*/
std::string clsid() const { return m_clsid; }
uint16_t version_minor() const { return m_version_minor; }
uint16_t version_major() const { return m_version_major; }
/**
* In theory, specifies a byte order. In practice, no other values besides FE FF (which imply little endian order) are used.
*/
std::string byte_order() const { return m_byte_order; }
/**
* For major version 3, must be 0x9 (sector size = 512 bytes). For major version 4, must be 0xc (sector size = 4096 bytes).
*/
uint16_t sector_shift() const { return m_sector_shift; }
uint16_t mini_sector_shift() const { return m_mini_sector_shift; }
std::string reserved1() const { return m_reserved1; }
/**
* Number of directory sectors in this file. For major version 3, must be 0.
*/
int32_t size_dir() const { return m_size_dir; }
/**
* Number of FAT sectors in this file.
*/
int32_t size_fat() const { return m_size_fat; }
/**
* Starting sector number for directory stream.
*/
int32_t ofs_dir() const { return m_ofs_dir; }
/**
* A transaction sequence number, which is incremented each time the file is saved if transactions are implemented, 0 otherwise.
*/
int32_t transaction_seq() const { return m_transaction_seq; }
int32_t mini_stream_cutoff_size() const { return m_mini_stream_cutoff_size; }
/**
* Starting sector number for mini FAT.
*/
int32_t ofs_mini_fat() const { return m_ofs_mini_fat; }
/**
* Number of mini FAT sectors in this file.
*/
int32_t size_mini_fat() const { return m_size_mini_fat; }
/**
* Starting sector number for DIFAT.
*/
int32_t ofs_difat() const { return m_ofs_difat; }
/**
* Number of DIFAT sectors in this file.
*/
int32_t size_difat() const { return m_size_difat; }
std::vector<int32_t>* difat() const { return m_difat; }
microsoft_cfb_t* _root() const { return m__root; }
microsoft_cfb_t* _parent() const { return m__parent; }
};
class fat_entries_t : public kaitai::kstruct {
public:
fat_entries_t(kaitai::kstream* p__io, microsoft_cfb_t* p__parent = 0, microsoft_cfb_t* p__root = 0);
private:
void _read();
void _clean_up();
public:
~fat_entries_t();
private:
std::vector<int32_t>* m_entries;
microsoft_cfb_t* m__root;
microsoft_cfb_t* m__parent;
public:
std::vector<int32_t>* entries() const { return m_entries; }
microsoft_cfb_t* _root() const { return m__root; }
microsoft_cfb_t* _parent() const { return m__parent; }
};
class dir_entry_t : public kaitai::kstruct {
public:
enum obj_type_t {
OBJ_TYPE_UNKNOWN = 0,
OBJ_TYPE_STORAGE = 1,
OBJ_TYPE_STREAM = 2,
OBJ_TYPE_ROOT_STORAGE = 5
};
enum rb_color_t {
RB_COLOR_RED = 0,
RB_COLOR_BLACK = 1
};
dir_entry_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent = 0, microsoft_cfb_t* p__root = 0);
private:
void _read();
void _clean_up();
public:
~dir_entry_t();
private:
bool f_mini_stream;
std::string m_mini_stream;
bool n_mini_stream;
public:
bool _is_null_mini_stream() { mini_stream(); return n_mini_stream; };
private:
public:
std::string mini_stream();
private:
bool f_child;
dir_entry_t* m_child;
bool n_child;
public:
bool _is_null_child() { child(); return n_child; };
private:
public:
dir_entry_t* child();
private:
bool f_left_sibling;
dir_entry_t* m_left_sibling;
bool n_left_sibling;
public:
bool _is_null_left_sibling() { left_sibling(); return n_left_sibling; };
private:
public:
dir_entry_t* left_sibling();
private:
bool f_right_sibling;
dir_entry_t* m_right_sibling;
bool n_right_sibling;
public:
bool _is_null_right_sibling() { right_sibling(); return n_right_sibling; };
private:
public:
dir_entry_t* right_sibling();
private:
std::string m_name;
uint16_t m_name_len;
obj_type_t m_object_type;
rb_color_t m_color_flag;
int32_t m_left_sibling_id;
int32_t m_right_sibling_id;
int32_t m_child_id;
std::string m_clsid;
uint32_t m_state;
uint64_t m_time_create;
uint64_t m_time_mod;
int32_t m_ofs;
uint64_t m_size;
microsoft_cfb_t* m__root;
kaitai::kstruct* m__parent;
public:
std::string name() const { return m_name; }
uint16_t name_len() const { return m_name_len; }
obj_type_t object_type() const { return m_object_type; }
rb_color_t color_flag() const { return m_color_flag; }
int32_t left_sibling_id() const { return m_left_sibling_id; }
int32_t right_sibling_id() const { return m_right_sibling_id; }
int32_t child_id() const { return m_child_id; }
std::string clsid() const { return m_clsid; }
/**
* User-defined flags for storage or root storage objects
*/
uint32_t state() const { return m_state; }
/**
* Creation time, in Windows FILETIME format (number of 100-nanosecond intervals since January 1, 1601, UTC)
*/
uint64_t time_create() const { return m_time_create; }
/**
* Modification time, in Windows FILETIME format (number of 100-nanosecond intervals since January 1, 1601, UTC).
*/
uint64_t time_mod() const { return m_time_mod; }
/**
* For stream object, number of starting sector. For a root storage object, first sector of the mini stream, if the mini stream exists.
*/
int32_t ofs() const { return m_ofs; }
/**
* For stream object, size of user-defined data in bytes. For a root storage object, size of the mini stream.
*/
uint64_t size() const { return m_size; }
microsoft_cfb_t* _root() const { return m__root; }
kaitai::kstruct* _parent() const { return m__parent; }
};
private:
bool f_sector_size;
int32_t m_sector_size;
public:
int32_t sector_size();
private:
bool f_fat;
fat_entries_t* m_fat;
public:
fat_entries_t* fat();
private:
bool f_dir;
dir_entry_t* m_dir;
public:
dir_entry_t* dir();
private:
cfb_header_t* m_header;
microsoft_cfb_t* m__root;
kaitai::kstruct* m__parent;
std::string m__raw_fat;
kaitai::kstream* m__io__raw_fat;
public:
cfb_header_t* header() const { return m_header; }
microsoft_cfb_t* _root() const { return m__root; }
kaitai::kstruct* _parent() const { return m__parent; }
std::string _raw_fat() const { return m__raw_fat; }
kaitai::kstream* _io__raw_fat() const { return m__io__raw_fat; }
};
#endif // MICROSOFT_CFB_H_
// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild
#include "microsoft_cfb.h"
#include "kaitai/exceptions.h"
microsoft_cfb_t::microsoft_cfb_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent, microsoft_cfb_t* p__root) : kaitai::kstruct(p__io) {
m__parent = p__parent;
m__root = this;
m_header = 0;
m_fat = 0;
m__io__raw_fat = 0;
m_dir = 0;
f_sector_size = false;
f_fat = false;
f_dir = false;
try {
_read();
} catch(...) {
_clean_up();
throw;
}
}
void microsoft_cfb_t::_read() {
m_header = new cfb_header_t(m__io, this, m__root);
}
microsoft_cfb_t::~microsoft_cfb_t() {
_clean_up();
}
void microsoft_cfb_t::_clean_up() {
if (m_header) {
delete m_header; m_header = 0;
}
if (f_fat) {
if (m__io__raw_fat) {
delete m__io__raw_fat; m__io__raw_fat = 0;
}
if (m_fat) {
delete m_fat; m_fat = 0;
}
}
if (f_dir) {
if (m_dir) {
delete m_dir; m_dir = 0;
}
}
}
microsoft_cfb_t::cfb_header_t::cfb_header_t(kaitai::kstream* p__io, microsoft_cfb_t* p__parent, microsoft_cfb_t* p__root) : kaitai::kstruct(p__io) {
m__parent = p__parent;
m__root = p__root;
m_difat = 0;
try {
_read();
} catch(...) {
_clean_up();
throw;
}
}
void microsoft_cfb_t::cfb_header_t::_read() {
m_signature = m__io->read_bytes(8);
if (!(signature() == std::string("\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", 8))) {
throw kaitai::validation_not_equal_error<std::string>(std::string("\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", 8), signature(), _io(), std::string("/types/cfb_header/seq/0"));
}
m_clsid = m__io->read_bytes(16);
if (!(clsid() == std::string("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", 16))) {
throw kaitai::validation_not_equal_error<std::string>(std::string("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", 16), clsid(), _io(), std::string("/types/cfb_header/seq/1"));
}
m_version_minor = m__io->read_u2le();
m_version_major = m__io->read_u2le();
m_byte_order = m__io->read_bytes(2);
if (!(byte_order() == std::string("\xFE\xFF", 2))) {
throw kaitai::validation_not_equal_error<std::string>(std::string("\xFE\xFF", 2), byte_order(), _io(), std::string("/types/cfb_header/seq/4"));
}
m_sector_shift = m__io->read_u2le();
m_mini_sector_shift = m__io->read_u2le();
m_reserved1 = m__io->read_bytes(6);
m_size_dir = m__io->read_s4le();
m_size_fat = m__io->read_s4le();
m_ofs_dir = m__io->read_s4le();
m_transaction_seq = m__io->read_s4le();
m_mini_stream_cutoff_size = m__io->read_s4le();
m_ofs_mini_fat = m__io->read_s4le();
m_size_mini_fat = m__io->read_s4le();
m_ofs_difat = m__io->read_s4le();
m_size_difat = m__io->read_s4le();
m_difat = new std::vector<int32_t>();
const int l_difat = 109;
for (int i = 0; i < l_difat; i++) {
m_difat->push_back(m__io->read_s4le());
}
}
microsoft_cfb_t::cfb_header_t::~cfb_header_t() {
_clean_up();
}
void microsoft_cfb_t::cfb_header_t::_clean_up() {
if (m_difat) {
delete m_difat; m_difat = 0;
}
}
microsoft_cfb_t::fat_entries_t::fat_entries_t(kaitai::kstream* p__io, microsoft_cfb_t* p__parent, microsoft_cfb_t* p__root) : kaitai::kstruct(p__io) {
m__parent = p__parent;
m__root = p__root;
m_entries = 0;
try {
_read();
} catch(...) {
_clean_up();
throw;
}
}
void microsoft_cfb_t::fat_entries_t::_read() {
m_entries = new std::vector<int32_t>();
{
int i = 0;
while (!m__io->is_eof()) {
m_entries->push_back(m__io->read_s4le());
i++;
}
}
}
microsoft_cfb_t::fat_entries_t::~fat_entries_t() {
_clean_up();
}
void microsoft_cfb_t::fat_entries_t::_clean_up() {
if (m_entries) {
delete m_entries; m_entries = 0;
}
}
microsoft_cfb_t::dir_entry_t::dir_entry_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent, microsoft_cfb_t* p__root) : kaitai::kstruct(p__io) {
m__parent = p__parent;
m__root = p__root;
m_child = 0;
m_left_sibling = 0;
m_right_sibling = 0;
f_mini_stream = false;
f_child = false;
f_left_sibling = false;
f_right_sibling = false;
try {
_read();
} catch(...) {
_clean_up();
throw;
}
}
void microsoft_cfb_t::dir_entry_t::_read() {
m_name = kaitai::kstream::bytes_to_str(m__io->read_bytes(64), std::string("UTF-16LE"));
m_name_len = m__io->read_u2le();
m_object_type = static_cast<microsoft_cfb_t::dir_entry_t::obj_type_t>(m__io->read_u1());
m_color_flag = static_cast<microsoft_cfb_t::dir_entry_t::rb_color_t>(m__io->read_u1());
m_left_sibling_id = m__io->read_s4le();
m_right_sibling_id = m__io->read_s4le();
m_child_id = m__io->read_s4le();
m_clsid = m__io->read_bytes(16);
m_state = m__io->read_u4le();
m_time_create = m__io->read_u8le();
m_time_mod = m__io->read_u8le();
m_ofs = m__io->read_s4le();
m_size = m__io->read_u8le();
}
microsoft_cfb_t::dir_entry_t::~dir_entry_t() {
_clean_up();
}
void microsoft_cfb_t::dir_entry_t::_clean_up() {
if (f_mini_stream && !n_mini_stream) {
}
if (f_child && !n_child) {
if (m_child) {
delete m_child; m_child = 0;
}
}
if (f_left_sibling && !n_left_sibling) {
if (m_left_sibling) {
delete m_left_sibling; m_left_sibling = 0;
}
}
if (f_right_sibling && !n_right_sibling) {
if (m_right_sibling) {
delete m_right_sibling; m_right_sibling = 0;
}
}
}
std::string microsoft_cfb_t::dir_entry_t::mini_stream() {
if (f_mini_stream)
return m_mini_stream;
n_mini_stream = true;
if (object_type() == microsoft_cfb_t::dir_entry_t::OBJ_TYPE_ROOT_STORAGE) {
n_mini_stream = false;
kaitai::kstream *io = _root()->_io();
std::streampos _pos = io->pos();
io->seek(((ofs() + 1) * _root()->sector_size()));
m_mini_stream = io->read_bytes(size());
io->seek(_pos);
f_mini_stream = true;
}
return m_mini_stream;
}
microsoft_cfb_t::dir_entry_t* microsoft_cfb_t::dir_entry_t::child() {
if (f_child)
return m_child;
n_child = true;
if (child_id() != -1) {
n_child = false;
kaitai::kstream *io = _root()->_io();
std::streampos _pos = io->pos();
io->seek((((_root()->header()->ofs_dir() + 1) * _root()->sector_size()) + (child_id() * 128)));
m_child = new dir_entry_t(io, this, m__root);
io->seek(_pos);
f_child = true;
}
return m_child;
}
microsoft_cfb_t::dir_entry_t* microsoft_cfb_t::dir_entry_t::left_sibling() {
if (f_left_sibling)
return m_left_sibling;
n_left_sibling = true;
if (left_sibling_id() != -1) {
n_left_sibling = false;
kaitai::kstream *io = _root()->_io();
std::streampos _pos = io->pos();
io->seek((((_root()->header()->ofs_dir() + 1) * _root()->sector_size()) + (left_sibling_id() * 128)));
m_left_sibling = new dir_entry_t(io, this, m__root);
io->seek(_pos);
f_left_sibling = true;
}
return m_left_sibling;
}
microsoft_cfb_t::dir_entry_t* microsoft_cfb_t::dir_entry_t::right_sibling() {
if (f_right_sibling)
return m_right_sibling;
n_right_sibling = true;
if (right_sibling_id() != -1) {
n_right_sibling = false;
kaitai::kstream *io = _root()->_io();
std::streampos _pos = io->pos();
io->seek((((_root()->header()->ofs_dir() + 1) * _root()->sector_size()) + (right_sibling_id() * 128)));
m_right_sibling = new dir_entry_t(io, this, m__root);
io->seek(_pos);
f_right_sibling = true;
}
return m_right_sibling;
}
int32_t microsoft_cfb_t::sector_size() {
if (f_sector_size)
return m_sector_size;
m_sector_size = (1 << header()->sector_shift());
f_sector_size = true;
return m_sector_size;
}
microsoft_cfb_t::fat_entries_t* microsoft_cfb_t::fat() {
if (f_fat)
return m_fat;
std::streampos _pos = m__io->pos();
m__io->seek(sector_size());
m__raw_fat = m__io->read_bytes((header()->size_fat() * sector_size()));
m__io__raw_fat = new kaitai::kstream(m__raw_fat);
m_fat = new fat_entries_t(m__io__raw_fat, this, m__root);
m__io->seek(_pos);
f_fat = true;
return m_fat;
}
microsoft_cfb_t::dir_entry_t* microsoft_cfb_t::dir() {
if (f_dir)
return m_dir;
std::streampos _pos = m__io->pos();
m__io->seek(((header()->ofs_dir() + 1) * sector_size()));
m_dir = new dir_entry_t(m__io, this, m__root);
m__io->seek(_pos);
f_dir = true;
return m_dir;
}