DOS MZ executable: C++11/STL parsing library

DOS MZ file format is a traditional format for executables in MS-DOS environment. Many modern formats (i.e. Windows PE) still maintain compatibility stub with this format.

As opposed to .com file format (which basically sports one 64K code segment of raw CPU instructions), DOS MZ .exe file format allowed more flexible memory management, loading of larger programs and added support for relocations.

File extension

["exe", "ovl"]

KS implementation details

License: CC0-1.0
Minimal Kaitai Struct required: 0.9

References

This page hosts a formal specification of DOS MZ executable using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.

Usage

Runtime library

All parsing code for C++11/STL generated by Kaitai Struct depends on the C++/STL runtime library. You have to install it before you can parse data.

For C++, the easiest way is to clone the runtime library sources and build them along with your project.

Code

Using Kaitai Struct in C++/STL usually consists of 3 steps.

  1. We need to create an STL input stream (std::istream). One can open local file for that, or use existing std::string or char* buffer.
    #include <fstream>
    
    std::ifstream is("path/to/local/file.exe", std::ifstream::binary);
    
    #include <sstream>
    
    std::istringstream is(str);
    
    #include <sstream>
    
    const char buf[] = { ... };
    std::string str(buf, sizeof buf);
    std::istringstream is(str);
    
  2. We need to wrap our input stream into Kaitai stream:
    #include "kaitai/kaitaistream.h"
    
    kaitai::kstream ks(&is);
    
  3. And finally, we can invoke the parsing:
    dos_mz_t data(&ks);
    

After that, one can get various attributes from the structure by invoking getter methods like:

data.header() // => get header

C++11/STL source code to parse DOS MZ executable

dos_mz.h

#pragma once

// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild

#include "kaitai/kaitaistruct.h"
#include <stdint.h>
#include <memory>
#include <vector>

#if KAITAI_STRUCT_VERSION < 9000L
#error "Incompatible Kaitai Struct C++/STL API: version 0.9 or later is required"
#endif

/**
 * DOS MZ file format is a traditional format for executables in MS-DOS
 * environment. Many modern formats (i.e. Windows PE) still maintain
 * compatibility stub with this format.
 * 
 * As opposed to .com file format (which basically sports one 64K code
 * segment of raw CPU instructions), DOS MZ .exe file format allowed
 * more flexible memory management, loading of larger programs and
 * added support for relocations.
 * \sa http://www.delorie.com/djgpp/doc/exe/ Source
 */

class dos_mz_t : public kaitai::kstruct {

public:
    class exe_header_t;
    class mz_header_t;
    class relocation_t;

    dos_mz_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent = nullptr, dos_mz_t* p__root = nullptr);

private:
    void _read();
    void _clean_up();

public:
    ~dos_mz_t();

    class exe_header_t : public kaitai::kstruct {

    public:

        exe_header_t(kaitai::kstream* p__io, dos_mz_t* p__parent = nullptr, dos_mz_t* p__root = nullptr);

    private:
        void _read();
        void _clean_up();

    public:
        ~exe_header_t();

    private:
        bool f_len_body;
        int32_t m_len_body;

    public:
        int32_t len_body();

    private:
        std::unique_ptr<mz_header_t> m_mz;
        std::string m_rest_of_header;
        dos_mz_t* m__root;
        dos_mz_t* m__parent;

    public:
        mz_header_t* mz() const { return m_mz.get(); }
        std::string rest_of_header() const { return m_rest_of_header; }
        dos_mz_t* _root() const { return m__root; }
        dos_mz_t* _parent() const { return m__parent; }
    };

    class mz_header_t : public kaitai::kstruct {

    public:

        mz_header_t(kaitai::kstream* p__io, dos_mz_t::exe_header_t* p__parent = nullptr, dos_mz_t* p__root = nullptr);

    private:
        void _read();
        void _clean_up();

    public:
        ~mz_header_t();

    private:
        bool f_len_header;
        int32_t m_len_header;

    public:
        int32_t len_header();

    private:
        std::string m_magic;
        uint16_t m_last_page_extra_bytes;
        uint16_t m_num_pages;
        uint16_t m_num_relocations;
        uint16_t m_header_size;
        uint16_t m_min_allocation;
        uint16_t m_max_allocation;
        uint16_t m_initial_ss;
        uint16_t m_initial_sp;
        uint16_t m_checksum;
        uint16_t m_initial_ip;
        uint16_t m_initial_cs;
        uint16_t m_ofs_relocations;
        uint16_t m_overlay_id;
        dos_mz_t* m__root;
        dos_mz_t::exe_header_t* m__parent;

    public:
        std::string magic() const { return m_magic; }
        uint16_t last_page_extra_bytes() const { return m_last_page_extra_bytes; }
        uint16_t num_pages() const { return m_num_pages; }
        uint16_t num_relocations() const { return m_num_relocations; }
        uint16_t header_size() const { return m_header_size; }
        uint16_t min_allocation() const { return m_min_allocation; }
        uint16_t max_allocation() const { return m_max_allocation; }
        uint16_t initial_ss() const { return m_initial_ss; }
        uint16_t initial_sp() const { return m_initial_sp; }
        uint16_t checksum() const { return m_checksum; }
        uint16_t initial_ip() const { return m_initial_ip; }
        uint16_t initial_cs() const { return m_initial_cs; }
        uint16_t ofs_relocations() const { return m_ofs_relocations; }
        uint16_t overlay_id() const { return m_overlay_id; }
        dos_mz_t* _root() const { return m__root; }
        dos_mz_t::exe_header_t* _parent() const { return m__parent; }
    };

    class relocation_t : public kaitai::kstruct {

    public:

        relocation_t(kaitai::kstream* p__io, dos_mz_t* p__parent = nullptr, dos_mz_t* p__root = nullptr);

    private:
        void _read();
        void _clean_up();

    public:
        ~relocation_t();

    private:
        uint16_t m_ofs;
        uint16_t m_seg;
        dos_mz_t* m__root;
        dos_mz_t* m__parent;

    public:
        uint16_t ofs() const { return m_ofs; }
        uint16_t seg() const { return m_seg; }
        dos_mz_t* _root() const { return m__root; }
        dos_mz_t* _parent() const { return m__parent; }
    };

private:
    bool f_relocations;
    std::unique_ptr<std::vector<std::unique_ptr<relocation_t>>> m_relocations;
    bool n_relocations;

public:
    bool _is_null_relocations() { relocations(); return n_relocations; };

private:

public:
    std::vector<std::unique_ptr<relocation_t>>* relocations();

private:
    std::unique_ptr<exe_header_t> m_header;
    std::string m_body;
    dos_mz_t* m__root;
    kaitai::kstruct* m__parent;

public:
    exe_header_t* header() const { return m_header.get(); }
    std::string body() const { return m_body; }
    dos_mz_t* _root() const { return m__root; }
    kaitai::kstruct* _parent() const { return m__parent; }
};

dos_mz.cpp

// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild

#include "dos_mz.h"
#include "kaitai/exceptions.h"

dos_mz_t::dos_mz_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent, dos_mz_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = this;
    m_header = nullptr;
    m_relocations = nullptr;
    f_relocations = false;
    _read();
}

void dos_mz_t::_read() {
    m_header = std::unique_ptr<exe_header_t>(new exe_header_t(m__io, this, m__root));
    m_body = m__io->read_bytes(header()->len_body());
}

dos_mz_t::~dos_mz_t() {
    _clean_up();
}

void dos_mz_t::_clean_up() {
    if (f_relocations && !n_relocations) {
    }
}

dos_mz_t::exe_header_t::exe_header_t(kaitai::kstream* p__io, dos_mz_t* p__parent, dos_mz_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    m_mz = nullptr;
    f_len_body = false;
    _read();
}

void dos_mz_t::exe_header_t::_read() {
    m_mz = std::unique_ptr<mz_header_t>(new mz_header_t(m__io, this, m__root));
    m_rest_of_header = m__io->read_bytes((mz()->len_header() - 28));
}

dos_mz_t::exe_header_t::~exe_header_t() {
    _clean_up();
}

void dos_mz_t::exe_header_t::_clean_up() {
}

int32_t dos_mz_t::exe_header_t::len_body() {
    if (f_len_body)
        return m_len_body;
    m_len_body = (((mz()->last_page_extra_bytes() == 0) ? ((mz()->num_pages() * 512)) : ((((mz()->num_pages() - 1) * 512) + mz()->last_page_extra_bytes()))) - mz()->len_header());
    f_len_body = true;
    return m_len_body;
}

dos_mz_t::mz_header_t::mz_header_t(kaitai::kstream* p__io, dos_mz_t::exe_header_t* p__parent, dos_mz_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    f_len_header = false;
    _read();
}

void dos_mz_t::mz_header_t::_read() {
    m_magic = kaitai::kstream::bytes_to_str(m__io->read_bytes(2), std::string("ASCII"));
    if (!( ((magic() == (std::string("MZ"))) || (magic() == (std::string("ZM")))) )) {
        throw kaitai::validation_not_any_of_error<std::string>(magic(), _io(), std::string("/types/mz_header/seq/0"));
    }
    m_last_page_extra_bytes = m__io->read_u2le();
    m_num_pages = m__io->read_u2le();
    m_num_relocations = m__io->read_u2le();
    m_header_size = m__io->read_u2le();
    m_min_allocation = m__io->read_u2le();
    m_max_allocation = m__io->read_u2le();
    m_initial_ss = m__io->read_u2le();
    m_initial_sp = m__io->read_u2le();
    m_checksum = m__io->read_u2le();
    m_initial_ip = m__io->read_u2le();
    m_initial_cs = m__io->read_u2le();
    m_ofs_relocations = m__io->read_u2le();
    m_overlay_id = m__io->read_u2le();
}

dos_mz_t::mz_header_t::~mz_header_t() {
    _clean_up();
}

void dos_mz_t::mz_header_t::_clean_up() {
}

int32_t dos_mz_t::mz_header_t::len_header() {
    if (f_len_header)
        return m_len_header;
    m_len_header = (header_size() * 16);
    f_len_header = true;
    return m_len_header;
}

dos_mz_t::relocation_t::relocation_t(kaitai::kstream* p__io, dos_mz_t* p__parent, dos_mz_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    _read();
}

void dos_mz_t::relocation_t::_read() {
    m_ofs = m__io->read_u2le();
    m_seg = m__io->read_u2le();
}

dos_mz_t::relocation_t::~relocation_t() {
    _clean_up();
}

void dos_mz_t::relocation_t::_clean_up() {
}

std::vector<std::unique_ptr<dos_mz_t::relocation_t>>* dos_mz_t::relocations() {
    if (f_relocations)
        return m_relocations.get();
    n_relocations = true;
    if (header()->mz()->ofs_relocations() != 0) {
        n_relocations = false;
        kaitai::kstream *io = header()->_io();
        std::streampos _pos = io->pos();
        io->seek(header()->mz()->ofs_relocations());
        m_relocations = std::unique_ptr<std::vector<std::unique_ptr<relocation_t>>>(new std::vector<std::unique_ptr<relocation_t>>());
        const int l_relocations = header()->mz()->num_relocations();
        for (int i = 0; i < l_relocations; i++) {
            m_relocations->push_back(std::move(std::unique_ptr<relocation_t>(new relocation_t(io, this, m__root))));
        }
        io->seek(_pos);
        f_relocations = true;
    }
    return m_relocations.get();
}