Python interpreter runs .py files in 2 step process: first, it produces bytecode, which it then executes. Translation of .py source into bytecode is time-consuming, so Python dumps compiled bytecode into .pyc files, to be reused from cache at later time if possible.
.pyc file is essentially a raw dump of py_object
(see body
) with
a simple header prepended.
This page hosts a formal specification of .pyc file format of Python using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.
digraph {
rankdir=LR;
node [shape=plaintext];
subgraph cluster__python_pyc_27 {
label="PythonPyc27";
graph[style=dotted];
python_pyc_27__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="version_magic_pos">0</TD><TD PORT="version_magic_size">2</TD><TD>u2le→Version</TD><TD PORT="version_magic_type">version_magic</TD></TR>
<TR><TD PORT="crlf_pos">2</TD><TD PORT="crlf_size">2</TD><TD>u2le</TD><TD PORT="crlf_type">crlf</TD></TR>
<TR><TD PORT="modification_timestamp_pos">4</TD><TD PORT="modification_timestamp_size">4</TD><TD>u4le</TD><TD PORT="modification_timestamp_type">modification_timestamp</TD></TR>
<TR><TD PORT="body_pos">8</TD><TD PORT="body_size">...</TD><TD>PyObject</TD><TD PORT="body_type">body</TD></TR>
</TABLE>>];
subgraph cluster__code_object {
label="PythonPyc27::CodeObject";
graph[style=dotted];
code_object__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="arg_count_pos">0</TD><TD PORT="arg_count_size">4</TD><TD>u4le</TD><TD PORT="arg_count_type">arg_count</TD></TR>
<TR><TD PORT="local_count_pos">4</TD><TD PORT="local_count_size">4</TD><TD>u4le</TD><TD PORT="local_count_type">local_count</TD></TR>
<TR><TD PORT="stack_size_pos">8</TD><TD PORT="stack_size_size">4</TD><TD>u4le</TD><TD PORT="stack_size_type">stack_size</TD></TR>
<TR><TD PORT="flags_pos">12</TD><TD PORT="flags_size">4</TD><TD>u4le→FlagsEnum</TD><TD PORT="flags_type">flags</TD></TR>
<TR><TD PORT="code_pos">16</TD><TD PORT="code_size">...</TD><TD>Assembly</TD><TD PORT="code_type">code</TD></TR>
<TR><TD PORT="consts_pos">...</TD><TD PORT="consts_size">...</TD><TD>PyObject</TD><TD PORT="consts_type">consts</TD></TR>
<TR><TD PORT="names_pos">...</TD><TD PORT="names_size">...</TD><TD>PyObject</TD><TD PORT="names_type">names</TD></TR>
<TR><TD PORT="var_names_pos">...</TD><TD PORT="var_names_size">...</TD><TD>PyObject</TD><TD PORT="var_names_type">var_names</TD></TR>
<TR><TD PORT="free_vars_pos">...</TD><TD PORT="free_vars_size">...</TD><TD>PyObject</TD><TD PORT="free_vars_type">free_vars</TD></TR>
<TR><TD PORT="cell_vars_pos">...</TD><TD PORT="cell_vars_size">...</TD><TD>PyObject</TD><TD PORT="cell_vars_type">cell_vars</TD></TR>
<TR><TD PORT="filename_pos">...</TD><TD PORT="filename_size">...</TD><TD>PyObject</TD><TD PORT="filename_type">filename</TD></TR>
<TR><TD PORT="name_pos">...</TD><TD PORT="name_size">...</TD><TD>PyObject</TD><TD PORT="name_type">name</TD></TR>
<TR><TD PORT="first_line_no_pos">...</TD><TD PORT="first_line_no_size">4</TD><TD>u4le</TD><TD PORT="first_line_no_type">first_line_no</TD></TR>
<TR><TD PORT="lnotab_pos">...</TD><TD PORT="lnotab_size">...</TD><TD>PyObject</TD><TD PORT="lnotab_type">lnotab</TD></TR>
</TABLE>>];
}
subgraph cluster__assembly {
label="PythonPyc27::Assembly";
graph[style=dotted];
assembly__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="string_magic_pos">0</TD><TD PORT="string_magic_size">1</TD><TD></TD><TD PORT="string_magic_type">string_magic</TD></TR>
<TR><TD PORT="length_pos">1</TD><TD PORT="length_size">4</TD><TD>u4le</TD><TD PORT="length_type">length</TD></TR>
<TR><TD PORT="items_pos">5</TD><TD PORT="items_size">length</TD><TD>OpArgs</TD><TD PORT="items_type">items</TD></TR>
</TABLE>>];
}
subgraph cluster__op_arg {
label="PythonPyc27::OpArg";
graph[style=dotted];
op_arg__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="op_code_pos">0</TD><TD PORT="op_code_size">1</TD><TD>u1→OpCodeEnum</TD><TD PORT="op_code_type">op_code</TD></TR>
<TR><TD PORT="arg_pos">1</TD><TD PORT="arg_size">2</TD><TD>u2le</TD><TD PORT="arg_type">arg</TD></TR>
</TABLE>>];
}
subgraph cluster__py_object {
label="PythonPyc27::PyObject";
graph[style=dotted];
py_object__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="type_pos">0</TD><TD PORT="type_size">1</TD><TD>u1→ObjectType</TD><TD PORT="type_type">type</TD></TR>
<TR><TD PORT="value_pos">1</TD><TD PORT="value_size">...</TD><TD>switch (type)</TD><TD PORT="value_type">value</TD></TR>
</TABLE>>];
py_object__seq_value_switch [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#F0F2E4">case</TD><TD BGCOLOR="#F0F2E4">type</TD></TR>
<TR><TD>:object_type_string</TD><TD PORT="case0">PyString</TD></TR>
<TR><TD>:object_type_tuple</TD><TD PORT="case1">Tuple</TD></TR>
<TR><TD>:object_type_py_true</TD><TD PORT="case2">PyTrue</TD></TR>
<TR><TD>:object_type_py_false</TD><TD PORT="case3">PyFalse</TD></TR>
<TR><TD>:object_type_none</TD><TD PORT="case4">PyNone</TD></TR>
<TR><TD>:object_type_string_ref</TD><TD PORT="case5">StringRef</TD></TR>
<TR><TD>:object_type_code_object</TD><TD PORT="case6">CodeObject</TD></TR>
<TR><TD>:object_type_interned</TD><TD PORT="case7">InternedString</TD></TR>
</TABLE>>];
subgraph cluster__py_none {
label="PythonPyc27::PyObject::PyNone";
graph[style=dotted];
py_none__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
</TABLE>>];
}
subgraph cluster__py_false {
label="PythonPyc27::PyObject::PyFalse";
graph[style=dotted];
py_false__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
</TABLE>>];
}
subgraph cluster__string_ref {
label="PythonPyc27::PyObject::StringRef";
graph[style=dotted];
string_ref__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="interned_list_index_pos">0</TD><TD PORT="interned_list_index_size">4</TD><TD>u4le</TD><TD PORT="interned_list_index_type">interned_list_index</TD></TR>
</TABLE>>];
}
subgraph cluster__py_true {
label="PythonPyc27::PyObject::PyTrue";
graph[style=dotted];
py_true__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
</TABLE>>];
}
subgraph cluster__tuple {
label="PythonPyc27::PyObject::Tuple";
graph[style=dotted];
tuple__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="count_pos">0</TD><TD PORT="count_size">4</TD><TD>u4le</TD><TD PORT="count_type">count</TD></TR>
<TR><TD PORT="items_pos">4</TD><TD PORT="items_size">...</TD><TD>PyObject</TD><TD PORT="items_type">items</TD></TR>
<TR><TD COLSPAN="4" PORT="items__repeat">repeat count times</TD></TR>
</TABLE>>];
}
subgraph cluster__unicode_string {
label="PythonPyc27::PyObject::UnicodeString";
graph[style=dotted];
unicode_string__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="length_pos">0</TD><TD PORT="length_size">4</TD><TD>u4le</TD><TD PORT="length_type">length</TD></TR>
<TR><TD PORT="data_pos">4</TD><TD PORT="data_size">length</TD><TD>str(utf-8)</TD><TD PORT="data_type">data</TD></TR>
</TABLE>>];
}
subgraph cluster__interned_string {
label="PythonPyc27::PyObject::InternedString";
graph[style=dotted];
interned_string__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="length_pos">0</TD><TD PORT="length_size">4</TD><TD>u4le</TD><TD PORT="length_type">length</TD></TR>
<TR><TD PORT="data_pos">4</TD><TD PORT="data_size">length</TD><TD>str(utf-8)</TD><TD PORT="data_type">data</TD></TR>
</TABLE>>];
}
subgraph cluster__py_string {
label="PythonPyc27::PyObject::PyString";
graph[style=dotted];
py_string__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="length_pos">0</TD><TD PORT="length_size">4</TD><TD>u4le</TD><TD PORT="length_type">length</TD></TR>
<TR><TD PORT="data_pos">4</TD><TD PORT="data_size">length</TD><TD></TD><TD PORT="data_type">data</TD></TR>
</TABLE>>];
}
}
subgraph cluster__op_args {
label="PythonPyc27::OpArgs";
graph[style=dotted];
op_args__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="items_pos">0</TD><TD PORT="items_size">3</TD><TD>OpArg</TD><TD PORT="items_type">items</TD></TR>
<TR><TD COLSPAN="4" PORT="items__repeat">repeat to end of stream</TD></TR>
</TABLE>>];
}
}
python_pyc_27__seq:body_type -> py_object__seq [style=bold];
code_object__seq:code_type -> assembly__seq [style=bold];
code_object__seq:consts_type -> py_object__seq [style=bold];
code_object__seq:names_type -> py_object__seq [style=bold];
code_object__seq:var_names_type -> py_object__seq [style=bold];
code_object__seq:free_vars_type -> py_object__seq [style=bold];
code_object__seq:cell_vars_type -> py_object__seq [style=bold];
code_object__seq:filename_type -> py_object__seq [style=bold];
code_object__seq:name_type -> py_object__seq [style=bold];
code_object__seq:lnotab_type -> py_object__seq [style=bold];
assembly__seq:length_type -> assembly__seq:items_size [color="#404040"];
assembly__seq:items_type -> op_args__seq [style=bold];
py_object__seq:value_type -> py_object__seq_value_switch [style=bold];
py_object__seq_value_switch:case0 -> py_string__seq [style=bold];
py_object__seq_value_switch:case1 -> tuple__seq [style=bold];
py_object__seq_value_switch:case2 -> py_true__seq [style=bold];
py_object__seq_value_switch:case3 -> py_false__seq [style=bold];
py_object__seq_value_switch:case4 -> py_none__seq [style=bold];
py_object__seq_value_switch:case5 -> string_ref__seq [style=bold];
py_object__seq_value_switch:case6 -> code_object__seq [style=bold];
py_object__seq_value_switch:case7 -> interned_string__seq [style=bold];
py_object__seq:type_type -> py_object__seq:value_type [color="#404040"];
tuple__seq:items_type -> py_object__seq [style=bold];
tuple__seq:count_type -> tuple__seq:items__repeat [color="#404040"];
unicode_string__seq:length_type -> unicode_string__seq:data_size [color="#404040"];
interned_string__seq:length_type -> interned_string__seq:data_size [color="#404040"];
py_string__seq:length_type -> py_string__seq:data_size [color="#404040"];
op_args__seq:items_type -> op_arg__seq [style=bold];
}