SQLite3 is a popular serverless SQL engine, implemented as a library to be used within other applications. It keeps its databases as regular disk files.
Every database file is segmented into pages. First page (starting at the very beginning) is special: it contains a file-global header which specifies some data relevant to proper parsing (i.e. format versions, size of page, etc). After the header, normal contents of the first page follow.
Each page would be of some type, and generally, they would be
reached via the links starting from the first page. First page type
(root_page
) is always "btree_page".
This page hosts a formal specification of SQLite3 database file using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.
digraph {
rankdir=LR;
node [shape=plaintext];
subgraph cluster__sqlite3 {
label="Sqlite3";
graph[style=dotted];
sqlite3__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="magic_pos">0</TD><TD PORT="magic_size">16</TD><TD></TD><TD PORT="magic_type">magic</TD></TR>
<TR><TD PORT="len_page_mod_pos">16</TD><TD PORT="len_page_mod_size">2</TD><TD>u2be</TD><TD PORT="len_page_mod_type">len_page_mod</TD></TR>
<TR><TD PORT="write_version_pos">18</TD><TD PORT="write_version_size">1</TD><TD>u1→Versions</TD><TD PORT="write_version_type">write_version</TD></TR>
<TR><TD PORT="read_version_pos">19</TD><TD PORT="read_version_size">1</TD><TD>u1→Versions</TD><TD PORT="read_version_type">read_version</TD></TR>
<TR><TD PORT="reserved_space_pos">20</TD><TD PORT="reserved_space_size">1</TD><TD>u1</TD><TD PORT="reserved_space_type">reserved_space</TD></TR>
<TR><TD PORT="max_payload_frac_pos">21</TD><TD PORT="max_payload_frac_size">1</TD><TD>u1</TD><TD PORT="max_payload_frac_type">max_payload_frac</TD></TR>
<TR><TD PORT="min_payload_frac_pos">22</TD><TD PORT="min_payload_frac_size">1</TD><TD>u1</TD><TD PORT="min_payload_frac_type">min_payload_frac</TD></TR>
<TR><TD PORT="leaf_payload_frac_pos">23</TD><TD PORT="leaf_payload_frac_size">1</TD><TD>u1</TD><TD PORT="leaf_payload_frac_type">leaf_payload_frac</TD></TR>
<TR><TD PORT="file_change_counter_pos">24</TD><TD PORT="file_change_counter_size">4</TD><TD>u4be</TD><TD PORT="file_change_counter_type">file_change_counter</TD></TR>
<TR><TD PORT="num_pages_pos">28</TD><TD PORT="num_pages_size">4</TD><TD>u4be</TD><TD PORT="num_pages_type">num_pages</TD></TR>
<TR><TD PORT="first_freelist_trunk_page_pos">32</TD><TD PORT="first_freelist_trunk_page_size">4</TD><TD>u4be</TD><TD PORT="first_freelist_trunk_page_type">first_freelist_trunk_page</TD></TR>
<TR><TD PORT="num_freelist_pages_pos">36</TD><TD PORT="num_freelist_pages_size">4</TD><TD>u4be</TD><TD PORT="num_freelist_pages_type">num_freelist_pages</TD></TR>
<TR><TD PORT="schema_cookie_pos">40</TD><TD PORT="schema_cookie_size">4</TD><TD>u4be</TD><TD PORT="schema_cookie_type">schema_cookie</TD></TR>
<TR><TD PORT="schema_format_pos">44</TD><TD PORT="schema_format_size">4</TD><TD>u4be</TD><TD PORT="schema_format_type">schema_format</TD></TR>
<TR><TD PORT="def_page_cache_size_pos">48</TD><TD PORT="def_page_cache_size_size">4</TD><TD>u4be</TD><TD PORT="def_page_cache_size_type">def_page_cache_size</TD></TR>
<TR><TD PORT="largest_root_page_pos">52</TD><TD PORT="largest_root_page_size">4</TD><TD>u4be</TD><TD PORT="largest_root_page_type">largest_root_page</TD></TR>
<TR><TD PORT="text_encoding_pos">56</TD><TD PORT="text_encoding_size">4</TD><TD>u4be→Encodings</TD><TD PORT="text_encoding_type">text_encoding</TD></TR>
<TR><TD PORT="user_version_pos">60</TD><TD PORT="user_version_size">4</TD><TD>u4be</TD><TD PORT="user_version_type">user_version</TD></TR>
<TR><TD PORT="is_incremental_vacuum_pos">64</TD><TD PORT="is_incremental_vacuum_size">4</TD><TD>u4be</TD><TD PORT="is_incremental_vacuum_type">is_incremental_vacuum</TD></TR>
<TR><TD PORT="application_id_pos">68</TD><TD PORT="application_id_size">4</TD><TD>u4be</TD><TD PORT="application_id_type">application_id</TD></TR>
<TR><TD PORT="reserved_pos">72</TD><TD PORT="reserved_size">20</TD><TD></TD><TD PORT="reserved_type">reserved</TD></TR>
<TR><TD PORT="version_valid_for_pos">92</TD><TD PORT="version_valid_for_size">4</TD><TD>u4be</TD><TD PORT="version_valid_for_type">version_valid_for</TD></TR>
<TR><TD PORT="sqlite_version_number_pos">96</TD><TD PORT="sqlite_version_number_size">4</TD><TD>u4be</TD><TD PORT="sqlite_version_number_type">sqlite_version_number</TD></TR>
<TR><TD PORT="root_page_pos">100</TD><TD PORT="root_page_size">...</TD><TD>BtreePage</TD><TD PORT="root_page_type">root_page</TD></TR>
</TABLE>>];
sqlite3__inst__len_page [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">id</TD><TD BGCOLOR="#E0FFE0">value</TD></TR>
<TR><TD>len_page</TD><TD>(len_page_mod == 1 ? 65536 : len_page_mod)</TD></TR>
</TABLE>>];
subgraph cluster__serial {
label="Sqlite3::Serial";
graph[style=dotted];
serial__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="code_pos">0</TD><TD PORT="code_size">...</TD><TD>VlqBase128Be</TD><TD PORT="code_type">code</TD></TR>
</TABLE>>];
serial__inst__is_blob [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">id</TD><TD BGCOLOR="#E0FFE0">value</TD></TR>
<TR><TD>is_blob</TD><TD> ((code.value >= 12) && ((code.value % 2) == 0)) </TD></TR>
</TABLE>>];
serial__inst__is_string [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">id</TD><TD BGCOLOR="#E0FFE0">value</TD></TR>
<TR><TD>is_string</TD><TD> ((code.value >= 13) && ((code.value % 2) == 1)) </TD></TR>
</TABLE>>];
serial__inst__len_content [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">id</TD><TD BGCOLOR="#E0FFE0">value</TD></TR>
<TR><TD>len_content</TD><TD>((code.value - 12) / 2)</TD></TR>
</TABLE>>];
}
subgraph cluster__btree_page {
label="Sqlite3::BtreePage";
graph[style=dotted];
btree_page__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="page_type_pos">0</TD><TD PORT="page_type_size">1</TD><TD>u1</TD><TD PORT="page_type_type">page_type</TD></TR>
<TR><TD PORT="first_freeblock_pos">1</TD><TD PORT="first_freeblock_size">2</TD><TD>u2be</TD><TD PORT="first_freeblock_type">first_freeblock</TD></TR>
<TR><TD PORT="num_cells_pos">3</TD><TD PORT="num_cells_size">2</TD><TD>u2be</TD><TD PORT="num_cells_type">num_cells</TD></TR>
<TR><TD PORT="ofs_cells_pos">5</TD><TD PORT="ofs_cells_size">2</TD><TD>u2be</TD><TD PORT="ofs_cells_type">ofs_cells</TD></TR>
<TR><TD PORT="num_frag_free_bytes_pos">7</TD><TD PORT="num_frag_free_bytes_size">1</TD><TD>u1</TD><TD PORT="num_frag_free_bytes_type">num_frag_free_bytes</TD></TR>
<TR><TD PORT="right_ptr_pos">8</TD><TD PORT="right_ptr_size">4</TD><TD>u4be</TD><TD PORT="right_ptr_type">right_ptr</TD></TR>
<TR><TD PORT="cells_pos">12</TD><TD PORT="cells_size">2</TD><TD>RefCell</TD><TD PORT="cells_type">cells</TD></TR>
<TR><TD COLSPAN="4" PORT="cells__repeat">repeat num_cells times</TD></TR>
</TABLE>>];
}
subgraph cluster__cell_index_leaf {
label="Sqlite3::CellIndexLeaf";
graph[style=dotted];
cell_index_leaf__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="len_payload_pos">0</TD><TD PORT="len_payload_size">...</TD><TD>VlqBase128Be</TD><TD PORT="len_payload_type">len_payload</TD></TR>
<TR><TD PORT="payload_pos">...</TD><TD PORT="payload_size">len_payload.value</TD><TD>CellPayload</TD><TD PORT="payload_type">payload</TD></TR>
</TABLE>>];
}
subgraph cluster__serials {
label="Sqlite3::Serials";
graph[style=dotted];
serials__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="entries_pos">0</TD><TD PORT="entries_size">...</TD><TD>Serial</TD><TD PORT="entries_type">entries</TD></TR>
<TR><TD COLSPAN="4" PORT="entries__repeat">repeat to end of stream</TD></TR>
</TABLE>>];
}
subgraph cluster__cell_table_leaf {
label="Sqlite3::CellTableLeaf";
graph[style=dotted];
cell_table_leaf__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="len_payload_pos">0</TD><TD PORT="len_payload_size">...</TD><TD>VlqBase128Be</TD><TD PORT="len_payload_type">len_payload</TD></TR>
<TR><TD PORT="row_id_pos">...</TD><TD PORT="row_id_size">...</TD><TD>VlqBase128Be</TD><TD PORT="row_id_type">row_id</TD></TR>
<TR><TD PORT="payload_pos">...</TD><TD PORT="payload_size">len_payload.value</TD><TD>CellPayload</TD><TD PORT="payload_type">payload</TD></TR>
</TABLE>>];
}
subgraph cluster__cell_payload {
label="Sqlite3::CellPayload";
graph[style=dotted];
cell_payload__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="len_header_and_len_pos">0</TD><TD PORT="len_header_and_len_size">...</TD><TD>VlqBase128Be</TD><TD PORT="len_header_and_len_type">len_header_and_len</TD></TR>
<TR><TD PORT="column_serials_pos">...</TD><TD PORT="column_serials_size">(len_header_and_len.value - 1)</TD><TD>Serials</TD><TD PORT="column_serials_type">column_serials</TD></TR>
<TR><TD PORT="column_contents_pos">...</TD><TD PORT="column_contents_size">...</TD><TD>ColumnContent</TD><TD PORT="column_contents_type">column_contents</TD></TR>
<TR><TD COLSPAN="4" PORT="column_contents__repeat">repeat column_serials.entries.length times</TD></TR>
</TABLE>>];
}
subgraph cluster__cell_table_interior {
label="Sqlite3::CellTableInterior";
graph[style=dotted];
cell_table_interior__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="left_child_page_pos">0</TD><TD PORT="left_child_page_size">4</TD><TD>u4be</TD><TD PORT="left_child_page_type">left_child_page</TD></TR>
<TR><TD PORT="row_id_pos">4</TD><TD PORT="row_id_size">...</TD><TD>VlqBase128Be</TD><TD PORT="row_id_type">row_id</TD></TR>
</TABLE>>];
}
subgraph cluster__cell_index_interior {
label="Sqlite3::CellIndexInterior";
graph[style=dotted];
cell_index_interior__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="left_child_page_pos">0</TD><TD PORT="left_child_page_size">4</TD><TD>u4be</TD><TD PORT="left_child_page_type">left_child_page</TD></TR>
<TR><TD PORT="len_payload_pos">4</TD><TD PORT="len_payload_size">...</TD><TD>VlqBase128Be</TD><TD PORT="len_payload_type">len_payload</TD></TR>
<TR><TD PORT="payload_pos">...</TD><TD PORT="payload_size">len_payload.value</TD><TD>CellPayload</TD><TD PORT="payload_type">payload</TD></TR>
</TABLE>>];
}
subgraph cluster__column_content {
label="Sqlite3::ColumnContent";
graph[style=dotted];
column_content__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="as_int_pos">0</TD><TD PORT="as_int_size">...</TD><TD>switch (serial_type.code.value)</TD><TD PORT="as_int_type">as_int</TD></TR>
<TR><TD PORT="as_float_pos">...</TD><TD PORT="as_float_size">8</TD><TD>f8be</TD><TD PORT="as_float_type">as_float</TD></TR>
<TR><TD PORT="as_blob_pos">...</TD><TD PORT="as_blob_size">serial_type.len_content</TD><TD></TD><TD PORT="as_blob_type">as_blob</TD></TR>
<TR><TD PORT="as_str_pos">...</TD><TD PORT="as_str_size">serial_type.len_content</TD><TD>str(UTF-8)</TD><TD PORT="as_str_type">as_str</TD></TR>
</TABLE>>];
column_content__seq_as_int_switch [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#F0F2E4">case</TD><TD BGCOLOR="#F0F2E4">type</TD></TR>
</TABLE>>];
}
subgraph cluster__ref_cell {
label="Sqlite3::RefCell";
graph[style=dotted];
ref_cell__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="ofs_body_pos">0</TD><TD PORT="ofs_body_size">2</TD><TD>u2be</TD><TD PORT="ofs_body_type">ofs_body</TD></TR>
</TABLE>>];
ref_cell__inst__body [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="body_pos">ofs_body</TD><TD PORT="body_size">...</TD><TD>switch (_parent.page_type)</TD><TD PORT="body_type">body</TD></TR>
</TABLE>>];
ref_cell__inst__body_body_switch [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#F0F2E4">case</TD><TD BGCOLOR="#F0F2E4">type</TD></TR>
<TR><TD>13</TD><TD PORT="case0">CellTableLeaf</TD></TR>
<TR><TD>5</TD><TD PORT="case1">CellTableInterior</TD></TR>
<TR><TD>10</TD><TD PORT="case2">CellIndexLeaf</TD></TR>
<TR><TD>2</TD><TD PORT="case3">CellIndexInterior</TD></TR>
</TABLE>>];
}
}
sqlite3__seq:root_page_type -> btree_page__seq [style=bold];
sqlite3__seq:len_page_mod_type -> sqlite3__inst__len_page [color="#404040"];
sqlite3__seq:len_page_mod_type -> sqlite3__inst__len_page [color="#404040"];
serial__seq:code_type -> vlq_base128_be__seq [style=bold];
vlq_base128_be__inst__value:value_type -> serial__inst__is_blob [color="#404040"];
vlq_base128_be__inst__value:value_type -> serial__inst__is_blob [color="#404040"];
vlq_base128_be__inst__value:value_type -> serial__inst__is_string [color="#404040"];
vlq_base128_be__inst__value:value_type -> serial__inst__is_string [color="#404040"];
vlq_base128_be__inst__value:value_type -> serial__inst__len_content [color="#404040"];
btree_page__seq:cells_type -> ref_cell__seq [style=bold];
btree_page__seq:num_cells_type -> btree_page__seq:cells__repeat [color="#404040"];
cell_index_leaf__seq:len_payload_type -> vlq_base128_be__seq [style=bold];
vlq_base128_be__inst__value:value_type -> cell_index_leaf__seq:payload_size [color="#404040"];
cell_index_leaf__seq:payload_type -> cell_payload__seq [style=bold];
serials__seq:entries_type -> serial__seq [style=bold];
cell_table_leaf__seq:len_payload_type -> vlq_base128_be__seq [style=bold];
cell_table_leaf__seq:row_id_type -> vlq_base128_be__seq [style=bold];
vlq_base128_be__inst__value:value_type -> cell_table_leaf__seq:payload_size [color="#404040"];
cell_table_leaf__seq:payload_type -> cell_payload__seq [style=bold];
cell_payload__seq:len_header_and_len_type -> vlq_base128_be__seq [style=bold];
vlq_base128_be__inst__value:value_type -> cell_payload__seq:column_serials_size [color="#404040"];
cell_payload__seq:column_serials_type -> serials__seq [style=bold];
cell_payload__seq:column_contents_type -> column_content__seq [style=bold];
serials__seq:entries_type -> cell_payload__seq:column_contents__repeat [color="#404040"];
cell_table_interior__seq:row_id_type -> vlq_base128_be__seq [style=bold];
cell_index_interior__seq:len_payload_type -> vlq_base128_be__seq [style=bold];
vlq_base128_be__inst__value:value_type -> cell_index_interior__seq:payload_size [color="#404040"];
cell_index_interior__seq:payload_type -> cell_payload__seq [style=bold];
column_content__seq:as_int_type -> column_content__seq_as_int_switch [style=bold];
vlq_base128_be__inst__value:value_type -> column_content__seq:as_int_type [color="#404040"];
serial__inst__len_content:len_content_type -> column_content__seq:as_blob_size [color="#404040"];
serial__inst__len_content:len_content_type -> column_content__seq:as_str_size [color="#404040"];
ref_cell__seq:ofs_body_type -> ref_cell__inst__body:body_pos [color="#404040"];
ref_cell__inst__body:body_type -> ref_cell__inst__body_body_switch [style=bold];
ref_cell__inst__body_body_switch:case0 -> cell_table_leaf__seq [style=bold];
ref_cell__inst__body_body_switch:case1 -> cell_table_interior__seq [style=bold];
ref_cell__inst__body_body_switch:case2 -> cell_index_leaf__seq [style=bold];
ref_cell__inst__body_body_switch:case3 -> cell_index_interior__seq [style=bold];
btree_page__seq:page_type_type -> ref_cell__inst__body:body_type [color="#404040"];
}