DOS MZ executable: C++/STL parsing library

DOS MZ file format is a traditional format for executables in MS-DOS environment. Many modern formats (i.e. Windows PE) still maintain compatibility stub with this format.

As opposed to .com file format (which basically sports one 64K code segment of raw CPU instructions), DOS MZ .exe file format allowed more flexible memory management, loading of larger programs and added support for relocations.

File extension

["exe", "ovl"]

KS implementation details

License: CC0-1.0

References

This page hosts a formal specification of DOS MZ executable using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.

Usage

Using Kaitai Struct in C++/STL usually consists of 3 steps.

  1. We need to create an STL input stream (std::istream). One can open local file for that, or use existing std::string or char* buffer.
    #include <fstream>
    
    std::ifstream is("path/to/local/file.["exe", "ovl"]", std::ifstream::binary);
    #include <sstream>
    
    std::istringstream is(str);
    #include <sstream>
    
    const char buf[] = { ... };
    std::string str(buf, sizeof buf);
    std::istringstream is(str);
  2. We need to wrap our input stream into Kaitai stream:
    #include <kaitai/kaitaistream.h>
    
    kaitai::kstream ks(&is);
  3. And finally, we can invoke the parsing:
    dos_mz_t data(&ks);

After that, one can get various attributes from the structure by invoking getter methods like:

data.hdr() // => get hdr

C++/STL source code to parse DOS MZ executable

dos_mz.h

#ifndef DOS_MZ_H_
#define DOS_MZ_H_

// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild

#include "kaitai/kaitaistruct.h"

#include <stdint.h>
#include <vector>

#if KAITAI_STRUCT_VERSION < 7000L
#error "Incompatible Kaitai Struct C++/STL API: version 0.7 or later is required"
#endif

/**
 * DOS MZ file format is a traditional format for executables in MS-DOS
 * environment. Many modern formats (i.e. Windows PE) still maintain
 * compatibility stub with this format.
 * 
 * As opposed to .com file format (which basically sports one 64K code
 * segment of raw CPU instructions), DOS MZ .exe file format allowed
 * more flexible memory management, loading of larger programs and
 * added support for relocations.
 */

class dos_mz_t : public kaitai::kstruct {

public:
    class mz_header_t;
    class relocation_t;

    dos_mz_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent = 0, dos_mz_t* p__root = 0);

private:
    void _read();

public:
    ~dos_mz_t();

    class mz_header_t : public kaitai::kstruct {

    public:

        mz_header_t(kaitai::kstream* p__io, dos_mz_t* p__parent = 0, dos_mz_t* p__root = 0);

    private:
        void _read();

    public:
        ~mz_header_t();

    private:
        std::string m_magic;
        uint16_t m_last_page_extra_bytes;
        uint16_t m_num_pages;
        uint16_t m_num_relocations;
        uint16_t m_header_size;
        uint16_t m_min_allocation;
        uint16_t m_max_allocation;
        uint16_t m_initial_ss;
        uint16_t m_initial_sp;
        uint16_t m_checksum;
        uint16_t m_initial_ip;
        uint16_t m_initial_cs;
        uint16_t m_ofs_relocations;
        uint16_t m_overlay_id;
        dos_mz_t* m__root;
        dos_mz_t* m__parent;

    public:
        std::string magic() const { return m_magic; }
        uint16_t last_page_extra_bytes() const { return m_last_page_extra_bytes; }
        uint16_t num_pages() const { return m_num_pages; }
        uint16_t num_relocations() const { return m_num_relocations; }
        uint16_t header_size() const { return m_header_size; }
        uint16_t min_allocation() const { return m_min_allocation; }
        uint16_t max_allocation() const { return m_max_allocation; }
        uint16_t initial_ss() const { return m_initial_ss; }
        uint16_t initial_sp() const { return m_initial_sp; }
        uint16_t checksum() const { return m_checksum; }
        uint16_t initial_ip() const { return m_initial_ip; }
        uint16_t initial_cs() const { return m_initial_cs; }
        uint16_t ofs_relocations() const { return m_ofs_relocations; }
        uint16_t overlay_id() const { return m_overlay_id; }
        dos_mz_t* _root() const { return m__root; }
        dos_mz_t* _parent() const { return m__parent; }
    };

    class relocation_t : public kaitai::kstruct {

    public:

        relocation_t(kaitai::kstream* p__io, dos_mz_t* p__parent = 0, dos_mz_t* p__root = 0);

    private:
        void _read();

    public:
        ~relocation_t();

    private:
        uint16_t m_ofs;
        uint16_t m_seg;
        dos_mz_t* m__root;
        dos_mz_t* m__parent;

    public:
        uint16_t ofs() const { return m_ofs; }
        uint16_t seg() const { return m_seg; }
        dos_mz_t* _root() const { return m__root; }
        dos_mz_t* _parent() const { return m__parent; }
    };

private:
    mz_header_t* m_hdr;
    std::string m_mz_header2;
    std::vector<relocation_t*>* m_relocations;
    std::string m_body;
    dos_mz_t* m__root;
    kaitai::kstruct* m__parent;

public:
    mz_header_t* hdr() const { return m_hdr; }
    std::string mz_header2() const { return m_mz_header2; }
    std::vector<relocation_t*>* relocations() const { return m_relocations; }
    std::string body() const { return m_body; }
    dos_mz_t* _root() const { return m__root; }
    kaitai::kstruct* _parent() const { return m__parent; }
};

#endif  // DOS_MZ_H_

dos_mz.cpp

// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild

#include "dos_mz.h"



dos_mz_t::dos_mz_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent, dos_mz_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = this;
    _read();
}

void dos_mz_t::_read() {
    m_hdr = new mz_header_t(m__io, this, m__root);
    m_mz_header2 = m__io->read_bytes((hdr()->ofs_relocations() - 28));
    int l_relocations = hdr()->num_relocations();
    m_relocations = new std::vector<relocation_t*>();
    m_relocations->reserve(l_relocations);
    for (int i = 0; i < l_relocations; i++) {
        m_relocations->push_back(new relocation_t(m__io, this, m__root));
    }
    m_body = m__io->read_bytes_full();
}

dos_mz_t::~dos_mz_t() {
    delete m_hdr;
    for (std::vector<relocation_t*>::iterator it = m_relocations->begin(); it != m_relocations->end(); ++it) {
        delete *it;
    }
    delete m_relocations;
}

dos_mz_t::mz_header_t::mz_header_t(kaitai::kstream* p__io, dos_mz_t* p__parent, dos_mz_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    _read();
}

void dos_mz_t::mz_header_t::_read() {
    m_magic = m__io->read_bytes(2);
    m_last_page_extra_bytes = m__io->read_u2le();
    m_num_pages = m__io->read_u2le();
    m_num_relocations = m__io->read_u2le();
    m_header_size = m__io->read_u2le();
    m_min_allocation = m__io->read_u2le();
    m_max_allocation = m__io->read_u2le();
    m_initial_ss = m__io->read_u2le();
    m_initial_sp = m__io->read_u2le();
    m_checksum = m__io->read_u2le();
    m_initial_ip = m__io->read_u2le();
    m_initial_cs = m__io->read_u2le();
    m_ofs_relocations = m__io->read_u2le();
    m_overlay_id = m__io->read_u2le();
}

dos_mz_t::mz_header_t::~mz_header_t() {
}

dos_mz_t::relocation_t::relocation_t(kaitai::kstream* p__io, dos_mz_t* p__parent, dos_mz_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    _read();
}

void dos_mz_t::relocation_t::_read() {
    m_ofs = m__io->read_u2le();
    m_seg = m__io->read_u2le();
}

dos_mz_t::relocation_t::~relocation_t() {
}