PHP phar archive (without stub): GraphViz block diagram (.dot) source

A phar (PHP archive) file. The phar format is a custom archive format from the PHP ecosystem that is used to package a complete PHP library or application into a single self-contained archive. All phar archives start with an executable PHP stub, which can be used to allow executing or including phar files as if they were regular PHP scripts. PHP 5.3 and later include the phar extension, which adds native support for reading and manipulating phar files.

The phar format was originally developed as part of the PEAR library PHP_Archive, first released in 2005. Later, a native PHP extension named "phar" was developed, which was first released on PECL in 2007, and is included with PHP 5.3 and later. The phar extension has effectively superseded the PHP_Archive library, which has not been updated since 2010. The phar extension is also no longer released independently on PECL; it is now developed and released as part of PHP itself.

Because of current limitations in Kaitai Struct (seekaitai-io/kaitai_struct#158 and kaitai-io/kaitai_struct#538), the executable PHP stub that precedes the rest of the archive is not handled by this spec. Before parsing a phar using this spec, the stub must be removed manually.

A phar's stub is terminated by the special token __HALT_COMPILER(); (which may be followed by at most one space, the PHP tag end ?>, and an optional line terminator). The stub termination sequence is immediately followed by the remaining parts of the phar format, as described in this spec.

The phar stub usually contains code that loads the phar and runs a contained PHP file, but this is not required. A minimal valid phar stub is <?php __HALT_COMPILER(); - such a stub makes it impossible to execute the phar directly, but still allows loading or manipulating it using the phar extension.

Note: The phar format does not specify any encoding for text fields (stub, alias name, and all file names), so these fields may contain arbitrary binary data. The actual text encoding used in a specific phar file usually depends on the application that created the phar, and on the standard encoding of the system on which the phar was created.

Application

PHP

File extension

phar

KS implementation details

License: CC0-1.0
Minimal Kaitai Struct required: 0.9

References

This page hosts a formal specification of PHP phar archive (without stub) using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.

GraphViz block diagram source

phar_without_stub.dot

digraph {
	rankdir=LR;
	node [shape=plaintext];
	subgraph cluster__phar_without_stub {
		label="PharWithoutStub";
		graph[style=dotted];

		phar_without_stub__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
			<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
			<TR><TD PORT="manifest_pos">0</TD><TD PORT="manifest_size">...</TD><TD>Manifest</TD><TD PORT="manifest_type">manifest</TD></TR>
			<TR><TD PORT="files_pos">...</TD><TD PORT="files_size">manifest.file_entries[i].len_data_compressed</TD><TD></TD><TD PORT="files_type">files</TD></TR>
			<TR><TD COLSPAN="4" PORT="files__repeat">repeat manifest.num_files times</TD></TR>
			<TR><TD PORT="signature_pos">...</TD><TD PORT="signature_size"></TD><TD>Signature</TD><TD PORT="signature_type">signature</TD></TR>
		</TABLE>>];
		subgraph cluster__serialized_value {
			label="PharWithoutStub::SerializedValue";
			graph[style=dotted];

			serialized_value__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
				<TR><TD PORT="raw_pos">0</TD><TD PORT="raw_size"></TD><TD></TD><TD PORT="raw_type">raw</TD></TR>
			</TABLE>>];
			serialized_value__inst__parsed [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
				<TR><TD PORT="parsed_pos">0</TD><TD PORT="parsed_size">...</TD><TD>PhpSerializedValue</TD><TD PORT="parsed_type">parsed</TD></TR>
			</TABLE>>];
		}
		subgraph cluster__signature {
			label="PharWithoutStub::Signature";
			graph[style=dotted];

			signature__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
				<TR><TD PORT="data_pos">0</TD><TD PORT="data_size">((_io.size - _io.pos) - 8)</TD><TD></TD><TD PORT="data_type">data</TD></TR>
				<TR><TD PORT="type_pos">...</TD><TD PORT="type_size">4</TD><TD>u4le→SignatureType</TD><TD PORT="type_type">type</TD></TR>
				<TR><TD PORT="magic_pos">...</TD><TD PORT="magic_size">4</TD><TD></TD><TD PORT="magic_type">magic</TD></TR>
			</TABLE>>];
		}
		subgraph cluster__file_flags {
			label="PharWithoutStub::FileFlags";
			graph[style=dotted];

			file_flags__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
				<TR><TD PORT="value_pos">0</TD><TD PORT="value_size">4</TD><TD>u4le</TD><TD PORT="value_type">value</TD></TR>
			</TABLE>>];
			file_flags__inst__permissions [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">id</TD><TD BGCOLOR="#E0FFE0">value</TD></TR>
				<TR><TD>permissions</TD><TD>(value &amp; 511)</TD></TR>
			</TABLE>>];
			file_flags__inst__zlib_compressed [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">id</TD><TD BGCOLOR="#E0FFE0">value</TD></TR>
				<TR><TD>zlib_compressed</TD><TD>(value &amp; 4096) != 0</TD></TR>
			</TABLE>>];
			file_flags__inst__bzip2_compressed [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">id</TD><TD BGCOLOR="#E0FFE0">value</TD></TR>
				<TR><TD>bzip2_compressed</TD><TD>(value &amp; 8192) != 0</TD></TR>
			</TABLE>>];
		}
		subgraph cluster__api_version {
			label="PharWithoutStub::ApiVersion";
			graph[style=dotted];

			api_version__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
				<TR><TD PORT="release_pos">0</TD><TD PORT="release_size">4b</TD><TD>b4</TD><TD PORT="release_type">release</TD></TR>
				<TR><TD PORT="major_pos">0:4</TD><TD PORT="major_size">4b</TD><TD>b4</TD><TD PORT="major_type">major</TD></TR>
				<TR><TD PORT="minor_pos">1</TD><TD PORT="minor_size">4b</TD><TD>b4</TD><TD PORT="minor_type">minor</TD></TR>
				<TR><TD PORT="unused_pos">1:4</TD><TD PORT="unused_size">4b</TD><TD>b4</TD><TD PORT="unused_type">unused</TD></TR>
			</TABLE>>];
		}
		subgraph cluster__global_flags {
			label="PharWithoutStub::GlobalFlags";
			graph[style=dotted];

			global_flags__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
				<TR><TD PORT="value_pos">0</TD><TD PORT="value_size">4</TD><TD>u4le</TD><TD PORT="value_type">value</TD></TR>
			</TABLE>>];
			global_flags__inst__any_zlib_compressed [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">id</TD><TD BGCOLOR="#E0FFE0">value</TD></TR>
				<TR><TD>any_zlib_compressed</TD><TD>(value &amp; 4096) != 0</TD></TR>
			</TABLE>>];
			global_flags__inst__any_bzip2_compressed [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">id</TD><TD BGCOLOR="#E0FFE0">value</TD></TR>
				<TR><TD>any_bzip2_compressed</TD><TD>(value &amp; 8192) != 0</TD></TR>
			</TABLE>>];
			global_flags__inst__has_signature [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">id</TD><TD BGCOLOR="#E0FFE0">value</TD></TR>
				<TR><TD>has_signature</TD><TD>(value &amp; 65536) != 0</TD></TR>
			</TABLE>>];
		}
		subgraph cluster__manifest {
			label="PharWithoutStub::Manifest";
			graph[style=dotted];

			manifest__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
				<TR><TD PORT="len_manifest_pos">0</TD><TD PORT="len_manifest_size">4</TD><TD>u4le</TD><TD PORT="len_manifest_type">len_manifest</TD></TR>
				<TR><TD PORT="num_files_pos">4</TD><TD PORT="num_files_size">4</TD><TD>u4le</TD><TD PORT="num_files_type">num_files</TD></TR>
				<TR><TD PORT="api_version_pos">8</TD><TD PORT="api_version_size">2</TD><TD>ApiVersion</TD><TD PORT="api_version_type">api_version</TD></TR>
				<TR><TD PORT="flags_pos">10</TD><TD PORT="flags_size">4</TD><TD>GlobalFlags</TD><TD PORT="flags_type">flags</TD></TR>
				<TR><TD PORT="len_alias_pos">14</TD><TD PORT="len_alias_size">4</TD><TD>u4le</TD><TD PORT="len_alias_type">len_alias</TD></TR>
				<TR><TD PORT="alias_pos">18</TD><TD PORT="alias_size">len_alias</TD><TD></TD><TD PORT="alias_type">alias</TD></TR>
				<TR><TD PORT="len_metadata_pos">...</TD><TD PORT="len_metadata_size">4</TD><TD>u4le</TD><TD PORT="len_metadata_type">len_metadata</TD></TR>
				<TR><TD PORT="metadata_pos">...</TD><TD PORT="metadata_size">len_metadata</TD><TD>SerializedValue</TD><TD PORT="metadata_type">metadata</TD></TR>
				<TR><TD PORT="file_entries_pos">...</TD><TD PORT="file_entries_size">...</TD><TD>FileEntry</TD><TD PORT="file_entries_type">file_entries</TD></TR>
				<TR><TD COLSPAN="4" PORT="file_entries__repeat">repeat num_files times</TD></TR>
			</TABLE>>];
		}
		subgraph cluster__file_entry {
			label="PharWithoutStub::FileEntry";
			graph[style=dotted];

			file_entry__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
				<TR><TD PORT="len_filename_pos">0</TD><TD PORT="len_filename_size">4</TD><TD>u4le</TD><TD PORT="len_filename_type">len_filename</TD></TR>
				<TR><TD PORT="filename_pos">4</TD><TD PORT="filename_size">len_filename</TD><TD></TD><TD PORT="filename_type">filename</TD></TR>
				<TR><TD PORT="len_data_uncompressed_pos">...</TD><TD PORT="len_data_uncompressed_size">4</TD><TD>u4le</TD><TD PORT="len_data_uncompressed_type">len_data_uncompressed</TD></TR>
				<TR><TD PORT="timestamp_pos">...</TD><TD PORT="timestamp_size">4</TD><TD>u4le</TD><TD PORT="timestamp_type">timestamp</TD></TR>
				<TR><TD PORT="len_data_compressed_pos">...</TD><TD PORT="len_data_compressed_size">4</TD><TD>u4le</TD><TD PORT="len_data_compressed_type">len_data_compressed</TD></TR>
				<TR><TD PORT="crc32_pos">...</TD><TD PORT="crc32_size">4</TD><TD>u4le</TD><TD PORT="crc32_type">crc32</TD></TR>
				<TR><TD PORT="flags_pos">...</TD><TD PORT="flags_size">4</TD><TD>FileFlags</TD><TD PORT="flags_type">flags</TD></TR>
				<TR><TD PORT="len_metadata_pos">...</TD><TD PORT="len_metadata_size">4</TD><TD>u4le</TD><TD PORT="len_metadata_type">len_metadata</TD></TR>
				<TR><TD PORT="metadata_pos">...</TD><TD PORT="metadata_size">len_metadata</TD><TD>SerializedValue</TD><TD PORT="metadata_type">metadata</TD></TR>
			</TABLE>>];
		}
	}
	phar_without_stub__seq:manifest_type -> manifest__seq [style=bold];
	file_entry__seq:len_data_compressed_type -> phar_without_stub__seq:files_size [color="#404040"];
	manifest__seq:num_files_type -> phar_without_stub__seq:files__repeat [color="#404040"];
	phar_without_stub__seq:signature_type -> signature__seq [style=bold];
	serialized_value__inst__parsed:parsed_type -> php_serialized_value__seq [style=bold];
	file_flags__seq:value_type -> file_flags__inst__permissions [color="#404040"];
	file_flags__seq:value_type -> file_flags__inst__zlib_compressed [color="#404040"];
	file_flags__seq:value_type -> file_flags__inst__bzip2_compressed [color="#404040"];
	global_flags__seq:value_type -> global_flags__inst__any_zlib_compressed [color="#404040"];
	global_flags__seq:value_type -> global_flags__inst__any_bzip2_compressed [color="#404040"];
	global_flags__seq:value_type -> global_flags__inst__has_signature [color="#404040"];
	manifest__seq:api_version_type -> api_version__seq [style=bold];
	manifest__seq:flags_type -> global_flags__seq [style=bold];
	manifest__seq:len_alias_type -> manifest__seq:alias_size [color="#404040"];
	manifest__seq:len_metadata_type -> manifest__seq:metadata_size [color="#404040"];
	manifest__seq:metadata_type -> serialized_value__seq [style=bold];
	manifest__seq:file_entries_type -> file_entry__seq [style=bold];
	manifest__seq:num_files_type -> manifest__seq:file_entries__repeat [color="#404040"];
	file_entry__seq:len_filename_type -> file_entry__seq:filename_size [color="#404040"];
	file_entry__seq:flags_type -> file_flags__seq [style=bold];
	file_entry__seq:len_metadata_type -> file_entry__seq:metadata_size [color="#404040"];
	file_entry__seq:metadata_type -> serialized_value__seq [style=bold];
}