DOS MZ file format is a traditional format for executables in MS-DOS environment. Many modern formats (i.e. Windows PE) still maintain compatibility stub with this format.
As opposed to .com file format (which basically sports one 64K code segment of raw CPU instructions), DOS MZ .exe file format allowed more flexible memory management, loading of larger programs and added support for relocations.
This page hosts a formal specification of DOS MZ executable using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.
digraph {
rankdir=LR;
node [shape=plaintext];
subgraph cluster__dos_mz {
label="DosMz";
graph[style=dotted];
dos_mz__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="header_pos">0</TD><TD PORT="header_size">...</TD><TD>ExeHeader</TD><TD PORT="header_type">header</TD></TR>
<TR><TD PORT="body_pos">...</TD><TD PORT="body_size">header.len_body</TD><TD></TD><TD PORT="body_type">body</TD></TR>
</TABLE>>];
dos_mz__inst__relocations [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="relocations_pos">header.mz.ofs_relocations</TD><TD PORT="relocations_size">4</TD><TD>Relocation</TD><TD PORT="relocations_type">relocations</TD></TR>
<TR><TD COLSPAN="4" PORT="relocations__repeat">repeat header.mz.num_relocations times</TD></TR>
<TR><TD COLSPAN="4" PORT="relocations__if">if header.mz.ofs_relocations != 0</TD></TR>
</TABLE>>];
subgraph cluster__exe_header {
label="DosMz::ExeHeader";
graph[style=dotted];
exe_header__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="mz_pos">0</TD><TD PORT="mz_size">28</TD><TD>MzHeader</TD><TD PORT="mz_type">mz</TD></TR>
<TR><TD PORT="rest_of_header_pos">28</TD><TD PORT="rest_of_header_size">mz.len_header - 28</TD><TD></TD><TD PORT="rest_of_header_type">rest_of_header</TD></TR>
</TABLE>>];
exe_header__inst__len_body [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">id</TD><TD BGCOLOR="#E0FFE0">value</TD></TR>
<TR><TD>len_body</TD><TD>(mz.last_page_extra_bytes == 0 ? mz.num_pages * 512 : (mz.num_pages - 1) * 512 + mz.last_page_extra_bytes) - mz.len_header</TD></TR>
</TABLE>>];
}
subgraph cluster__mz_header {
label="DosMz::MzHeader";
graph[style=dotted];
mz_header__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="magic_pos">0</TD><TD PORT="magic_size">2</TD><TD>str(ASCII)</TD><TD PORT="magic_type">magic</TD></TR>
<TR><TD COLSPAN="4" PORT="magic__valid">must be any of "MZ", "ZM"</TD></TR>
<TR><TD PORT="last_page_extra_bytes_pos">2</TD><TD PORT="last_page_extra_bytes_size">2</TD><TD>u2le</TD><TD PORT="last_page_extra_bytes_type">last_page_extra_bytes</TD></TR>
<TR><TD PORT="num_pages_pos">4</TD><TD PORT="num_pages_size">2</TD><TD>u2le</TD><TD PORT="num_pages_type">num_pages</TD></TR>
<TR><TD PORT="num_relocations_pos">6</TD><TD PORT="num_relocations_size">2</TD><TD>u2le</TD><TD PORT="num_relocations_type">num_relocations</TD></TR>
<TR><TD PORT="header_size_pos">8</TD><TD PORT="header_size_size">2</TD><TD>u2le</TD><TD PORT="header_size_type">header_size</TD></TR>
<TR><TD PORT="min_allocation_pos">10</TD><TD PORT="min_allocation_size">2</TD><TD>u2le</TD><TD PORT="min_allocation_type">min_allocation</TD></TR>
<TR><TD PORT="max_allocation_pos">12</TD><TD PORT="max_allocation_size">2</TD><TD>u2le</TD><TD PORT="max_allocation_type">max_allocation</TD></TR>
<TR><TD PORT="initial_ss_pos">14</TD><TD PORT="initial_ss_size">2</TD><TD>u2le</TD><TD PORT="initial_ss_type">initial_ss</TD></TR>
<TR><TD PORT="initial_sp_pos">16</TD><TD PORT="initial_sp_size">2</TD><TD>u2le</TD><TD PORT="initial_sp_type">initial_sp</TD></TR>
<TR><TD PORT="checksum_pos">18</TD><TD PORT="checksum_size">2</TD><TD>u2le</TD><TD PORT="checksum_type">checksum</TD></TR>
<TR><TD PORT="initial_ip_pos">20</TD><TD PORT="initial_ip_size">2</TD><TD>u2le</TD><TD PORT="initial_ip_type">initial_ip</TD></TR>
<TR><TD PORT="initial_cs_pos">22</TD><TD PORT="initial_cs_size">2</TD><TD>u2le</TD><TD PORT="initial_cs_type">initial_cs</TD></TR>
<TR><TD PORT="ofs_relocations_pos">24</TD><TD PORT="ofs_relocations_size">2</TD><TD>u2le</TD><TD PORT="ofs_relocations_type">ofs_relocations</TD></TR>
<TR><TD PORT="overlay_id_pos">26</TD><TD PORT="overlay_id_size">2</TD><TD>u2le</TD><TD PORT="overlay_id_type">overlay_id</TD></TR>
</TABLE>>];
mz_header__inst__len_header [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">id</TD><TD BGCOLOR="#E0FFE0">value</TD></TR>
<TR><TD>len_header</TD><TD>header_size * 16</TD></TR>
</TABLE>>];
}
subgraph cluster__relocation {
label="DosMz::Relocation";
graph[style=dotted];
relocation__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="ofs_pos">0</TD><TD PORT="ofs_size">2</TD><TD>u2le</TD><TD PORT="ofs_type">ofs</TD></TR>
<TR><TD PORT="seg_pos">2</TD><TD PORT="seg_size">2</TD><TD>u2le</TD><TD PORT="seg_type">seg</TD></TR>
</TABLE>>];
}
}
dos_mz__seq:header_type -> exe_header__seq [style=bold];
exe_header__inst__len_body:len_body_type -> dos_mz__seq:body_size [color="#404040"];
mz_header__seq:ofs_relocations_type -> dos_mz__inst__relocations:relocations_pos [color="#404040"];
dos_mz__inst__relocations:relocations_type -> relocation__seq [style=bold];
mz_header__seq:num_relocations_type -> dos_mz__inst__relocations:relocations__repeat [color="#404040"];
mz_header__seq:ofs_relocations_type -> dos_mz__inst__relocations:relocations__if [color="#404040"];
exe_header__seq:mz_type -> mz_header__seq [style=bold];
mz_header__inst__len_header:len_header_type -> exe_header__seq:rest_of_header_size [color="#404040"];
exe_header__seq:mz_size -> exe_header__seq:rest_of_header_size [color="#404040"];
mz_header__seq:last_page_extra_bytes_type -> exe_header__inst__len_body [color="#404040"];
mz_header__seq:num_pages_type -> exe_header__inst__len_body [color="#404040"];
mz_header__inst__len_header:len_header_type -> exe_header__inst__len_body [color="#404040"];
mz_header__seq:header_size_type -> mz_header__inst__len_header [color="#404040"];
}