.pyc file format of Python: GraphViz block diagram (.dot) source

Python interpreter runs .py files in 2 step process: first, it produces bytecode, which it then executes. Translation of .py source into bytecode is time-consuming, so Python dumps compiled bytecode into .pyc files, to be reused from cache at later time if possible.

.pyc file is essentially a raw dump of py_object (see body) with a simple header prepended.

Application

Python

File extension

pyc

KS implementation details

License: CC0-1.0

References

This page hosts a formal specification of .pyc file format of Python using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.

GraphViz block diagram source

python_pyc_27.dot

digraph {
	rankdir=LR;
	node [shape=plaintext];
	subgraph cluster__python_pyc_27 {
		label="PythonPyc27";
		graph[style=dotted];

		python_pyc_27__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
			<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
			<TR><TD PORT="version_magic_pos">0</TD><TD PORT="version_magic_size">2</TD><TD>u2le→Version</TD><TD PORT="version_magic_type">version_magic</TD></TR>
			<TR><TD PORT="crlf_pos">2</TD><TD PORT="crlf_size">2</TD><TD>u2le</TD><TD PORT="crlf_type">crlf</TD></TR>
			<TR><TD PORT="modification_timestamp_pos">4</TD><TD PORT="modification_timestamp_size">4</TD><TD>u4le</TD><TD PORT="modification_timestamp_type">modification_timestamp</TD></TR>
			<TR><TD PORT="body_pos">8</TD><TD PORT="body_size">...</TD><TD>PyObject</TD><TD PORT="body_type">body</TD></TR>
		</TABLE>>];
		subgraph cluster__code_object {
			label="PythonPyc27::CodeObject";
			graph[style=dotted];

			code_object__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
				<TR><TD PORT="arg_count_pos">0</TD><TD PORT="arg_count_size">4</TD><TD>u4le</TD><TD PORT="arg_count_type">arg_count</TD></TR>
				<TR><TD PORT="local_count_pos">4</TD><TD PORT="local_count_size">4</TD><TD>u4le</TD><TD PORT="local_count_type">local_count</TD></TR>
				<TR><TD PORT="stack_size_pos">8</TD><TD PORT="stack_size_size">4</TD><TD>u4le</TD><TD PORT="stack_size_type">stack_size</TD></TR>
				<TR><TD PORT="flags_pos">12</TD><TD PORT="flags_size">4</TD><TD>u4le→FlagsEnum</TD><TD PORT="flags_type">flags</TD></TR>
				<TR><TD PORT="code_pos">16</TD><TD PORT="code_size">...</TD><TD>Assembly</TD><TD PORT="code_type">code</TD></TR>
				<TR><TD PORT="consts_pos">...</TD><TD PORT="consts_size">...</TD><TD>PyObject</TD><TD PORT="consts_type">consts</TD></TR>
				<TR><TD PORT="names_pos">...</TD><TD PORT="names_size">...</TD><TD>PyObject</TD><TD PORT="names_type">names</TD></TR>
				<TR><TD PORT="var_names_pos">...</TD><TD PORT="var_names_size">...</TD><TD>PyObject</TD><TD PORT="var_names_type">var_names</TD></TR>
				<TR><TD PORT="free_vars_pos">...</TD><TD PORT="free_vars_size">...</TD><TD>PyObject</TD><TD PORT="free_vars_type">free_vars</TD></TR>
				<TR><TD PORT="cell_vars_pos">...</TD><TD PORT="cell_vars_size">...</TD><TD>PyObject</TD><TD PORT="cell_vars_type">cell_vars</TD></TR>
				<TR><TD PORT="filename_pos">...</TD><TD PORT="filename_size">...</TD><TD>PyObject</TD><TD PORT="filename_type">filename</TD></TR>
				<TR><TD PORT="name_pos">...</TD><TD PORT="name_size">...</TD><TD>PyObject</TD><TD PORT="name_type">name</TD></TR>
				<TR><TD PORT="first_line_no_pos">...</TD><TD PORT="first_line_no_size">4</TD><TD>u4le</TD><TD PORT="first_line_no_type">first_line_no</TD></TR>
				<TR><TD PORT="lnotab_pos">...</TD><TD PORT="lnotab_size">...</TD><TD>PyObject</TD><TD PORT="lnotab_type">lnotab</TD></TR>
			</TABLE>>];
		}
		subgraph cluster__assembly {
			label="PythonPyc27::Assembly";
			graph[style=dotted];

			assembly__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
				<TR><TD PORT="string_magic_pos">0</TD><TD PORT="string_magic_size">1</TD><TD></TD><TD PORT="string_magic_type">string_magic</TD></TR>
				<TR><TD PORT="length_pos">1</TD><TD PORT="length_size">4</TD><TD>u4le</TD><TD PORT="length_type">length</TD></TR>
				<TR><TD PORT="items_pos">5</TD><TD PORT="items_size">length</TD><TD>OpArgs</TD><TD PORT="items_type">items</TD></TR>
			</TABLE>>];
		}
		subgraph cluster__op_arg {
			label="PythonPyc27::OpArg";
			graph[style=dotted];

			op_arg__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
				<TR><TD PORT="op_code_pos">0</TD><TD PORT="op_code_size">1</TD><TD>u1→OpCodeEnum</TD><TD PORT="op_code_type">op_code</TD></TR>
				<TR><TD PORT="arg_pos">1</TD><TD PORT="arg_size">2</TD><TD>u2le</TD><TD PORT="arg_type">arg</TD></TR>
			</TABLE>>];
		}
		subgraph cluster__py_object {
			label="PythonPyc27::PyObject";
			graph[style=dotted];

			py_object__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
				<TR><TD PORT="type_pos">0</TD><TD PORT="type_size">1</TD><TD>u1→ObjectType</TD><TD PORT="type_type">type</TD></TR>
				<TR><TD PORT="value_pos">1</TD><TD PORT="value_size">...</TD><TD>switch (type)</TD><TD PORT="value_type">value</TD></TR>
			</TABLE>>];
py_object__seq_value_switch [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
	<TR><TD BGCOLOR="#F0F2E4">case</TD><TD BGCOLOR="#F0F2E4">type</TD></TR>
	<TR><TD>:object_type_string</TD><TD PORT="case0">PyString</TD></TR>
	<TR><TD>:object_type_tuple</TD><TD PORT="case1">Tuple</TD></TR>
	<TR><TD>:object_type_py_true</TD><TD PORT="case2">PyTrue</TD></TR>
	<TR><TD>:object_type_py_false</TD><TD PORT="case3">PyFalse</TD></TR>
	<TR><TD>:object_type_none</TD><TD PORT="case4">PyNone</TD></TR>
	<TR><TD>:object_type_string_ref</TD><TD PORT="case5">StringRef</TD></TR>
	<TR><TD>:object_type_code_object</TD><TD PORT="case6">CodeObject</TD></TR>
	<TR><TD>:object_type_interned</TD><TD PORT="case7">InternedString</TD></TR>
</TABLE>>];
			subgraph cluster__py_none {
				label="PythonPyc27::PyObject::PyNone";
				graph[style=dotted];

				py_none__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
					<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
				</TABLE>>];
			}
			subgraph cluster__py_false {
				label="PythonPyc27::PyObject::PyFalse";
				graph[style=dotted];

				py_false__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
					<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
				</TABLE>>];
			}
			subgraph cluster__string_ref {
				label="PythonPyc27::PyObject::StringRef";
				graph[style=dotted];

				string_ref__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
					<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
					<TR><TD PORT="interned_list_index_pos">0</TD><TD PORT="interned_list_index_size">4</TD><TD>u4le</TD><TD PORT="interned_list_index_type">interned_list_index</TD></TR>
				</TABLE>>];
			}
			subgraph cluster__py_true {
				label="PythonPyc27::PyObject::PyTrue";
				graph[style=dotted];

				py_true__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
					<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
				</TABLE>>];
			}
			subgraph cluster__tuple {
				label="PythonPyc27::PyObject::Tuple";
				graph[style=dotted];

				tuple__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
					<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
					<TR><TD PORT="count_pos">0</TD><TD PORT="count_size">4</TD><TD>u4le</TD><TD PORT="count_type">count</TD></TR>
					<TR><TD PORT="items_pos">4</TD><TD PORT="items_size">...</TD><TD>PyObject</TD><TD PORT="items_type">items</TD></TR>
					<TR><TD COLSPAN="4" PORT="items__repeat">repeat count times</TD></TR>
				</TABLE>>];
			}
			subgraph cluster__unicode_string {
				label="PythonPyc27::PyObject::UnicodeString";
				graph[style=dotted];

				unicode_string__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
					<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
					<TR><TD PORT="length_pos">0</TD><TD PORT="length_size">4</TD><TD>u4le</TD><TD PORT="length_type">length</TD></TR>
					<TR><TD PORT="data_pos">4</TD><TD PORT="data_size">length</TD><TD>str(utf-8)</TD><TD PORT="data_type">data</TD></TR>
				</TABLE>>];
			}
			subgraph cluster__interned_string {
				label="PythonPyc27::PyObject::InternedString";
				graph[style=dotted];

				interned_string__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
					<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
					<TR><TD PORT="length_pos">0</TD><TD PORT="length_size">4</TD><TD>u4le</TD><TD PORT="length_type">length</TD></TR>
					<TR><TD PORT="data_pos">4</TD><TD PORT="data_size">length</TD><TD>str(utf-8)</TD><TD PORT="data_type">data</TD></TR>
				</TABLE>>];
			}
			subgraph cluster__py_string {
				label="PythonPyc27::PyObject::PyString";
				graph[style=dotted];

				py_string__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
					<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
					<TR><TD PORT="length_pos">0</TD><TD PORT="length_size">4</TD><TD>u4le</TD><TD PORT="length_type">length</TD></TR>
					<TR><TD PORT="data_pos">4</TD><TD PORT="data_size">length</TD><TD></TD><TD PORT="data_type">data</TD></TR>
				</TABLE>>];
			}
		}
		subgraph cluster__op_args {
			label="PythonPyc27::OpArgs";
			graph[style=dotted];

			op_args__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
				<TR><TD PORT="items_pos">0</TD><TD PORT="items_size">3</TD><TD>OpArg</TD><TD PORT="items_type">items</TD></TR>
				<TR><TD COLSPAN="4" PORT="items__repeat">repeat to end of stream</TD></TR>
			</TABLE>>];
		}
	}
	python_pyc_27__seq:body_type -> py_object__seq [style=bold];
	code_object__seq:code_type -> assembly__seq [style=bold];
	code_object__seq:consts_type -> py_object__seq [style=bold];
	code_object__seq:names_type -> py_object__seq [style=bold];
	code_object__seq:var_names_type -> py_object__seq [style=bold];
	code_object__seq:free_vars_type -> py_object__seq [style=bold];
	code_object__seq:cell_vars_type -> py_object__seq [style=bold];
	code_object__seq:filename_type -> py_object__seq [style=bold];
	code_object__seq:name_type -> py_object__seq [style=bold];
	code_object__seq:lnotab_type -> py_object__seq [style=bold];
	assembly__seq:length_type -> assembly__seq:items_size [color="#404040"];
	assembly__seq:items_type -> op_args__seq [style=bold];
	py_object__seq:value_type -> py_object__seq_value_switch [style=bold];
	py_object__seq_value_switch:case0 -> py_string__seq [style=bold];
	py_object__seq_value_switch:case1 -> tuple__seq [style=bold];
	py_object__seq_value_switch:case2 -> py_true__seq [style=bold];
	py_object__seq_value_switch:case3 -> py_false__seq [style=bold];
	py_object__seq_value_switch:case4 -> py_none__seq [style=bold];
	py_object__seq_value_switch:case5 -> string_ref__seq [style=bold];
	py_object__seq_value_switch:case6 -> code_object__seq [style=bold];
	py_object__seq_value_switch:case7 -> interned_string__seq [style=bold];
	py_object__seq:type_type -> py_object__seq:value_type [color="#404040"];
	tuple__seq:items_type -> py_object__seq [style=bold];
	tuple__seq:count_type -> tuple__seq:items__repeat [color="#404040"];
	unicode_string__seq:length_type -> unicode_string__seq:data_size [color="#404040"];
	interned_string__seq:length_type -> interned_string__seq:data_size [color="#404040"];
	py_string__seq:length_type -> py_string__seq:data_size [color="#404040"];
	op_args__seq:items_type -> op_arg__seq [style=bold];
}