.zip file format: GraphViz block diagram (.dot) source

File extension

zip

KS implementation details

License: CC0-1.0

This page hosts a formal specification of .zip file format using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.

GraphViz block diagram source

zip.dot

digraph {
	rankdir=LR;
	node [shape=plaintext];
	subgraph cluster__zip {
		label="Zip";
		graph[style=dotted];

		zip__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
			<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
			<TR><TD PORT="sections_pos">0</TD><TD PORT="sections_size">...</TD><TD>PkSection</TD><TD PORT="sections_type">sections</TD></TR>
			<TR><TD COLSPAN="4" PORT="sections__repeat">repeat to end of stream</TD></TR>
		</TABLE>>];
		subgraph cluster__local_file {
			label="Zip::LocalFile";
			graph[style=dotted];

			local_file__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
				<TR><TD PORT="header_pos">0</TD><TD PORT="header_size">...</TD><TD>LocalFileHeader</TD><TD PORT="header_type">header</TD></TR>
				<TR><TD PORT="body_pos">...</TD><TD PORT="body_size">header.compressed_size</TD><TD></TD><TD PORT="body_type">body</TD></TR>
			</TABLE>>];
		}
		subgraph cluster__extra_field {
			label="Zip::ExtraField";
			graph[style=dotted];

			extra_field__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
				<TR><TD PORT="code_pos">0</TD><TD PORT="code_size">2</TD><TD>u2le?ExtraCodes</TD><TD PORT="code_type">code</TD></TR>
				<TR><TD PORT="size_pos">2</TD><TD PORT="size_size">2</TD><TD>u2le</TD><TD PORT="size_type">size</TD></TR>
				<TR><TD PORT="body_pos">4</TD><TD PORT="body_size">...</TD><TD>switch (code)</TD><TD PORT="body_type">body</TD></TR>
			</TABLE>>];
extra_field__seq_body_switch [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
	<TR><TD BGCOLOR="#F0F2E4">case</TD><TD BGCOLOR="#F0F2E4">type</TD></TR>
	<TR><TD>:extra_codes_ntfs</TD><TD PORT="case0">Ntfs</TD></TR>
	<TR><TD>:extra_codes_extended_timestamp</TD><TD PORT="case1">ExtendedTimestamp</TD></TR>
	<TR><TD>:extra_codes_infozip_unix_var_size</TD><TD PORT="case2">InfozipUnixVarSize</TD></TR>
</TABLE>>];
			subgraph cluster__ntfs {
				label="Zip::ExtraField::Ntfs";
				graph[style=dotted];

				ntfs__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
					<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
					<TR><TD PORT="reserved_pos">0</TD><TD PORT="reserved_size">4</TD><TD>u4le</TD><TD PORT="reserved_type">reserved</TD></TR>
					<TR><TD PORT="attributes_pos">4</TD><TD PORT="attributes_size">...</TD><TD>Attribute</TD><TD PORT="attributes_type">attributes</TD></TR>
					<TR><TD COLSPAN="4" PORT="attributes__repeat">repeat to end of stream</TD></TR>
				</TABLE>>];
				subgraph cluster__attribute {
					label="Zip::ExtraField::Ntfs::Attribute";
					graph[style=dotted];

					attribute__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
						<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
						<TR><TD PORT="tag_pos">0</TD><TD PORT="tag_size">2</TD><TD>u2le</TD><TD PORT="tag_type">tag</TD></TR>
						<TR><TD PORT="size_pos">2</TD><TD PORT="size_size">2</TD><TD>u2le</TD><TD PORT="size_type">size</TD></TR>
						<TR><TD PORT="body_pos">4</TD><TD PORT="body_size">...</TD><TD>switch (tag)</TD><TD PORT="body_type">body</TD></TR>
					</TABLE>>];
attribute__seq_body_switch [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
	<TR><TD BGCOLOR="#F0F2E4">case</TD><TD BGCOLOR="#F0F2E4">type</TD></TR>
	<TR><TD>1</TD><TD PORT="case0">Attribute1</TD></TR>
</TABLE>>];
				}
				subgraph cluster__attribute_1 {
					label="Zip::ExtraField::Ntfs::Attribute1";
					graph[style=dotted];

					attribute_1__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
						<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
						<TR><TD PORT="last_mod_time_pos">0</TD><TD PORT="last_mod_time_size">8</TD><TD>u8le</TD><TD PORT="last_mod_time_type">last_mod_time</TD></TR>
						<TR><TD PORT="last_access_time_pos">8</TD><TD PORT="last_access_time_size">8</TD><TD>u8le</TD><TD PORT="last_access_time_type">last_access_time</TD></TR>
						<TR><TD PORT="creation_time_pos">16</TD><TD PORT="creation_time_size">8</TD><TD>u8le</TD><TD PORT="creation_time_type">creation_time</TD></TR>
					</TABLE>>];
				}
			}
			subgraph cluster__extended_timestamp {
				label="Zip::ExtraField::ExtendedTimestamp";
				graph[style=dotted];

				extended_timestamp__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
					<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
					<TR><TD PORT="flags_pos">0</TD><TD PORT="flags_size">1</TD><TD>u1</TD><TD PORT="flags_type">flags</TD></TR>
					<TR><TD PORT="mod_time_pos">1</TD><TD PORT="mod_time_size">4</TD><TD>u4le</TD><TD PORT="mod_time_type">mod_time</TD></TR>
					<TR><TD PORT="access_time_pos">5</TD><TD PORT="access_time_size">4</TD><TD>u4le</TD><TD PORT="access_time_type">access_time</TD></TR>
					<TR><TD PORT="create_time_pos">9</TD><TD PORT="create_time_size">4</TD><TD>u4le</TD><TD PORT="create_time_type">create_time</TD></TR>
				</TABLE>>];
			}
			subgraph cluster__infozip_unix_var_size {
				label="Zip::ExtraField::InfozipUnixVarSize";
				graph[style=dotted];

				infozip_unix_var_size__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
					<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
					<TR><TD PORT="version_pos">0</TD><TD PORT="version_size">1</TD><TD>u1</TD><TD PORT="version_type">version</TD></TR>
					<TR><TD PORT="uid_size_pos">1</TD><TD PORT="uid_size_size">1</TD><TD>u1</TD><TD PORT="uid_size_type">uid_size</TD></TR>
					<TR><TD PORT="uid_pos">2</TD><TD PORT="uid_size">uid_size</TD><TD></TD><TD PORT="uid_type">uid</TD></TR>
					<TR><TD PORT="gid_size_pos">...</TD><TD PORT="gid_size_size">1</TD><TD>u1</TD><TD PORT="gid_size_type">gid_size</TD></TR>
					<TR><TD PORT="gid_pos">...</TD><TD PORT="gid_size">gid_size</TD><TD></TD><TD PORT="gid_type">gid</TD></TR>
				</TABLE>>];
			}
		}
		subgraph cluster__central_dir_entry {
			label="Zip::CentralDirEntry";
			graph[style=dotted];

			central_dir_entry__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
				<TR><TD PORT="version_made_by_pos">0</TD><TD PORT="version_made_by_size">2</TD><TD>u2le</TD><TD PORT="version_made_by_type">version_made_by</TD></TR>
				<TR><TD PORT="version_needed_to_extract_pos">2</TD><TD PORT="version_needed_to_extract_size">2</TD><TD>u2le</TD><TD PORT="version_needed_to_extract_type">version_needed_to_extract</TD></TR>
				<TR><TD PORT="flags_pos">4</TD><TD PORT="flags_size">2</TD><TD>u2le</TD><TD PORT="flags_type">flags</TD></TR>
				<TR><TD PORT="compression_method_pos">6</TD><TD PORT="compression_method_size">2</TD><TD>u2le?Compression</TD><TD PORT="compression_method_type">compression_method</TD></TR>
				<TR><TD PORT="last_mod_file_time_pos">8</TD><TD PORT="last_mod_file_time_size">2</TD><TD>u2le</TD><TD PORT="last_mod_file_time_type">last_mod_file_time</TD></TR>
				<TR><TD PORT="last_mod_file_date_pos">10</TD><TD PORT="last_mod_file_date_size">2</TD><TD>u2le</TD><TD PORT="last_mod_file_date_type">last_mod_file_date</TD></TR>
				<TR><TD PORT="crc32_pos">12</TD><TD PORT="crc32_size">4</TD><TD>u4le</TD><TD PORT="crc32_type">crc32</TD></TR>
				<TR><TD PORT="compressed_size_pos">16</TD><TD PORT="compressed_size_size">4</TD><TD>u4le</TD><TD PORT="compressed_size_type">compressed_size</TD></TR>
				<TR><TD PORT="uncompressed_size_pos">20</TD><TD PORT="uncompressed_size_size">4</TD><TD>u4le</TD><TD PORT="uncompressed_size_type">uncompressed_size</TD></TR>
				<TR><TD PORT="file_name_len_pos">24</TD><TD PORT="file_name_len_size">2</TD><TD>u2le</TD><TD PORT="file_name_len_type">file_name_len</TD></TR>
				<TR><TD PORT="extra_len_pos">26</TD><TD PORT="extra_len_size">2</TD><TD>u2le</TD><TD PORT="extra_len_type">extra_len</TD></TR>
				<TR><TD PORT="comment_len_pos">28</TD><TD PORT="comment_len_size">2</TD><TD>u2le</TD><TD PORT="comment_len_type">comment_len</TD></TR>
				<TR><TD PORT="disk_number_start_pos">30</TD><TD PORT="disk_number_start_size">2</TD><TD>u2le</TD><TD PORT="disk_number_start_type">disk_number_start</TD></TR>
				<TR><TD PORT="int_file_attr_pos">32</TD><TD PORT="int_file_attr_size">2</TD><TD>u2le</TD><TD PORT="int_file_attr_type">int_file_attr</TD></TR>
				<TR><TD PORT="ext_file_attr_pos">34</TD><TD PORT="ext_file_attr_size">4</TD><TD>u4le</TD><TD PORT="ext_file_attr_type">ext_file_attr</TD></TR>
				<TR><TD PORT="local_header_offset_pos">38</TD><TD PORT="local_header_offset_size">4</TD><TD>s4le</TD><TD PORT="local_header_offset_type">local_header_offset</TD></TR>
				<TR><TD PORT="file_name_pos">42</TD><TD PORT="file_name_size">file_name_len</TD><TD>str(UTF-8)</TD><TD PORT="file_name_type">file_name</TD></TR>
				<TR><TD PORT="extra_pos">...</TD><TD PORT="extra_size">extra_len</TD><TD>Extras</TD><TD PORT="extra_type">extra</TD></TR>
				<TR><TD PORT="comment_pos">...</TD><TD PORT="comment_size">comment_len</TD><TD>str(UTF-8)</TD><TD PORT="comment_type">comment</TD></TR>
			</TABLE>>];
			central_dir_entry__inst__local_header [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
				<TR><TD PORT="local_header_pos">local_header_offset</TD><TD PORT="local_header_size">...</TD><TD>PkSection</TD><TD PORT="local_header_type">local_header</TD></TR>
			</TABLE>>];
		}
		subgraph cluster__pk_section {
			label="Zip::PkSection";
			graph[style=dotted];

			pk_section__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
				<TR><TD PORT="magic_pos">0</TD><TD PORT="magic_size">2</TD><TD>50 4B</TD><TD PORT="magic_type">magic</TD></TR>
				<TR><TD PORT="section_type_pos">2</TD><TD PORT="section_type_size">2</TD><TD>u2le</TD><TD PORT="section_type_type">section_type</TD></TR>
				<TR><TD PORT="body_pos">4</TD><TD PORT="body_size">...</TD><TD>switch (section_type)</TD><TD PORT="body_type">body</TD></TR>
			</TABLE>>];
pk_section__seq_body_switch [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
	<TR><TD BGCOLOR="#F0F2E4">case</TD><TD BGCOLOR="#F0F2E4">type</TD></TR>
	<TR><TD>513</TD><TD PORT="case0">CentralDirEntry</TD></TR>
	<TR><TD>1027</TD><TD PORT="case1">LocalFile</TD></TR>
	<TR><TD>1541</TD><TD PORT="case2">EndOfCentralDir</TD></TR>
</TABLE>>];
		}
		subgraph cluster__extras {
			label="Zip::Extras";
			graph[style=dotted];

			extras__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
				<TR><TD PORT="entries_pos">0</TD><TD PORT="entries_size">...</TD><TD>ExtraField</TD><TD PORT="entries_type">entries</TD></TR>
				<TR><TD COLSPAN="4" PORT="entries__repeat">repeat to end of stream</TD></TR>
			</TABLE>>];
		}
		subgraph cluster__local_file_header {
			label="Zip::LocalFileHeader";
			graph[style=dotted];

			local_file_header__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
				<TR><TD PORT="version_pos">0</TD><TD PORT="version_size">2</TD><TD>u2le</TD><TD PORT="version_type">version</TD></TR>
				<TR><TD PORT="flags_pos">2</TD><TD PORT="flags_size">2</TD><TD>u2le</TD><TD PORT="flags_type">flags</TD></TR>
				<TR><TD PORT="compression_method_pos">4</TD><TD PORT="compression_method_size">2</TD><TD>u2le?Compression</TD><TD PORT="compression_method_type">compression_method</TD></TR>
				<TR><TD PORT="file_mod_time_pos">6</TD><TD PORT="file_mod_time_size">2</TD><TD>u2le</TD><TD PORT="file_mod_time_type">file_mod_time</TD></TR>
				<TR><TD PORT="file_mod_date_pos">8</TD><TD PORT="file_mod_date_size">2</TD><TD>u2le</TD><TD PORT="file_mod_date_type">file_mod_date</TD></TR>
				<TR><TD PORT="crc32_pos">10</TD><TD PORT="crc32_size">4</TD><TD>u4le</TD><TD PORT="crc32_type">crc32</TD></TR>
				<TR><TD PORT="compressed_size_pos">14</TD><TD PORT="compressed_size_size">4</TD><TD>u4le</TD><TD PORT="compressed_size_type">compressed_size</TD></TR>
				<TR><TD PORT="uncompressed_size_pos">18</TD><TD PORT="uncompressed_size_size">4</TD><TD>u4le</TD><TD PORT="uncompressed_size_type">uncompressed_size</TD></TR>
				<TR><TD PORT="file_name_len_pos">22</TD><TD PORT="file_name_len_size">2</TD><TD>u2le</TD><TD PORT="file_name_len_type">file_name_len</TD></TR>
				<TR><TD PORT="extra_len_pos">24</TD><TD PORT="extra_len_size">2</TD><TD>u2le</TD><TD PORT="extra_len_type">extra_len</TD></TR>
				<TR><TD PORT="file_name_pos">26</TD><TD PORT="file_name_size">file_name_len</TD><TD>str(UTF-8)</TD><TD PORT="file_name_type">file_name</TD></TR>
				<TR><TD PORT="extra_pos">...</TD><TD PORT="extra_size">extra_len</TD><TD>Extras</TD><TD PORT="extra_type">extra</TD></TR>
			</TABLE>>];
		}
		subgraph cluster__end_of_central_dir {
			label="Zip::EndOfCentralDir";
			graph[style=dotted];

			end_of_central_dir__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
				<TR><TD PORT="disk_of_end_of_central_dir_pos">0</TD><TD PORT="disk_of_end_of_central_dir_size">2</TD><TD>u2le</TD><TD PORT="disk_of_end_of_central_dir_type">disk_of_end_of_central_dir</TD></TR>
				<TR><TD PORT="disk_of_central_dir_pos">2</TD><TD PORT="disk_of_central_dir_size">2</TD><TD>u2le</TD><TD PORT="disk_of_central_dir_type">disk_of_central_dir</TD></TR>
				<TR><TD PORT="qty_central_dir_entries_on_disk_pos">4</TD><TD PORT="qty_central_dir_entries_on_disk_size">2</TD><TD>u2le</TD><TD PORT="qty_central_dir_entries_on_disk_type">qty_central_dir_entries_on_disk</TD></TR>
				<TR><TD PORT="qty_central_dir_entries_total_pos">6</TD><TD PORT="qty_central_dir_entries_total_size">2</TD><TD>u2le</TD><TD PORT="qty_central_dir_entries_total_type">qty_central_dir_entries_total</TD></TR>
				<TR><TD PORT="central_dir_size_pos">8</TD><TD PORT="central_dir_size_size">4</TD><TD>u4le</TD><TD PORT="central_dir_size_type">central_dir_size</TD></TR>
				<TR><TD PORT="central_dir_offset_pos">12</TD><TD PORT="central_dir_offset_size">4</TD><TD>u4le</TD><TD PORT="central_dir_offset_type">central_dir_offset</TD></TR>
				<TR><TD PORT="comment_len_pos">16</TD><TD PORT="comment_len_size">2</TD><TD>u2le</TD><TD PORT="comment_len_type">comment_len</TD></TR>
				<TR><TD PORT="comment_pos">18</TD><TD PORT="comment_size">comment_len</TD><TD>str(UTF-8)</TD><TD PORT="comment_type">comment</TD></TR>
			</TABLE>>];
		}
	}
	zip__seq:sections_type -> pk_section__seq [style=bold];
	local_file__seq:header_type -> local_file_header__seq [style=bold];
	local_file_header__seq:compressed_size_type -> local_file__seq:body_size [color="#404040"];
	extra_field__seq:body_type -> extra_field__seq_body_switch [style=bold];
	extra_field__seq_body_switch:case0 -> ntfs__seq [style=bold];
	extra_field__seq_body_switch:case1 -> extended_timestamp__seq [style=bold];
	extra_field__seq_body_switch:case2 -> infozip_unix_var_size__seq [style=bold];
	extra_field__seq:code_type -> extra_field__seq:body_type [color="#404040"];
	ntfs__seq:attributes_type -> attribute__seq [style=bold];
	attribute__seq:body_type -> attribute__seq_body_switch [style=bold];
	attribute__seq_body_switch:case0 -> attribute_1__seq [style=bold];
	attribute__seq:tag_type -> attribute__seq:body_type [color="#404040"];
	infozip_unix_var_size__seq:uid_size_type -> infozip_unix_var_size__seq:uid_size [color="#404040"];
	infozip_unix_var_size__seq:gid_size_type -> infozip_unix_var_size__seq:gid_size [color="#404040"];
	central_dir_entry__seq:file_name_len_type -> central_dir_entry__seq:file_name_size [color="#404040"];
	central_dir_entry__seq:extra_len_type -> central_dir_entry__seq:extra_size [color="#404040"];
	central_dir_entry__seq:extra_type -> extras__seq [style=bold];
	central_dir_entry__seq:comment_len_type -> central_dir_entry__seq:comment_size [color="#404040"];
	central_dir_entry__seq:local_header_offset_type -> central_dir_entry__inst__local_header:local_header_pos [color="#404040"];
	central_dir_entry__inst__local_header:local_header_type -> pk_section__seq [style=bold];
	pk_section__seq:body_type -> pk_section__seq_body_switch [style=bold];
	pk_section__seq_body_switch:case0 -> central_dir_entry__seq [style=bold];
	pk_section__seq_body_switch:case1 -> local_file__seq [style=bold];
	pk_section__seq_body_switch:case2 -> end_of_central_dir__seq [style=bold];
	pk_section__seq:section_type_type -> pk_section__seq:body_type [color="#404040"];
	extras__seq:entries_type -> extra_field__seq [style=bold];
	local_file_header__seq:file_name_len_type -> local_file_header__seq:file_name_size [color="#404040"];
	local_file_header__seq:extra_len_type -> local_file_header__seq:extra_size [color="#404040"];
	local_file_header__seq:extra_type -> extras__seq [style=bold];
	end_of_central_dir__seq:comment_len_type -> end_of_central_dir__seq:comment_size [color="#404040"];
}