InfluxDB is a scalable database optimized for storage of time series, real-time application metrics, operations monitoring events, etc, written in Go.
Data is stored in .tsm files, which are kept pretty simple conceptually. Each .tsm file contains a header and footer, which stores offset to an index. Index is used to find a data block for a requested time boundary.
This page hosts a formal specification of InfluxDB TSM file using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.
digraph {
rankdir=LR;
node [shape=plaintext];
subgraph cluster__tsm {
label="Tsm";
graph[style=dotted];
tsm__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="header_pos">0</TD><TD PORT="header_size">5</TD><TD>Header</TD><TD PORT="header_type">header</TD></TR>
</TABLE>>];
tsm__inst__index [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="index_pos">(_io.size - 8)</TD><TD PORT="index_size">8</TD><TD>Index</TD><TD PORT="index_type">index</TD></TR>
</TABLE>>];
subgraph cluster__header {
label="Tsm::Header";
graph[style=dotted];
header__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="magic_pos">0</TD><TD PORT="magic_size">4</TD><TD></TD><TD PORT="magic_type">magic</TD></TR>
<TR><TD PORT="version_pos">4</TD><TD PORT="version_size">1</TD><TD>u1</TD><TD PORT="version_type">version</TD></TR>
</TABLE>>];
}
subgraph cluster__index {
label="Tsm::Index";
graph[style=dotted];
index__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="offset_pos">0</TD><TD PORT="offset_size">8</TD><TD>u8be</TD><TD PORT="offset_type">offset</TD></TR>
</TABLE>>];
index__inst__entries [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="entries_pos">offset</TD><TD PORT="entries_size">...</TD><TD>IndexHeader</TD><TD PORT="entries_type">entries</TD></TR>
<TR><TD COLSPAN="4" PORT="entries__repeat">repeat until _io.pos == (_io.size - 8)</TD></TR>
</TABLE>>];
subgraph cluster__index_header {
label="Tsm::Index::IndexHeader";
graph[style=dotted];
index_header__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="key_len_pos">0</TD><TD PORT="key_len_size">2</TD><TD>u2be</TD><TD PORT="key_len_type">key_len</TD></TR>
<TR><TD PORT="key_pos">2</TD><TD PORT="key_size">key_len</TD><TD>str(UTF-8)</TD><TD PORT="key_type">key</TD></TR>
<TR><TD PORT="type_pos">...</TD><TD PORT="type_size">1</TD><TD>u1</TD><TD PORT="type_type">type</TD></TR>
<TR><TD PORT="entry_count_pos">...</TD><TD PORT="entry_count_size">2</TD><TD>u2be</TD><TD PORT="entry_count_type">entry_count</TD></TR>
<TR><TD PORT="index_entries_pos">...</TD><TD PORT="index_entries_size">28</TD><TD>IndexEntry</TD><TD PORT="index_entries_type">index_entries</TD></TR>
<TR><TD COLSPAN="4" PORT="index_entries__repeat">repeat entry_count times</TD></TR>
</TABLE>>];
subgraph cluster__index_entry {
label="Tsm::Index::IndexHeader::IndexEntry";
graph[style=dotted];
index_entry__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="min_time_pos">0</TD><TD PORT="min_time_size">8</TD><TD>u8be</TD><TD PORT="min_time_type">min_time</TD></TR>
<TR><TD PORT="max_time_pos">8</TD><TD PORT="max_time_size">8</TD><TD>u8be</TD><TD PORT="max_time_type">max_time</TD></TR>
<TR><TD PORT="block_offset_pos">16</TD><TD PORT="block_offset_size">8</TD><TD>u8be</TD><TD PORT="block_offset_type">block_offset</TD></TR>
<TR><TD PORT="block_size_pos">24</TD><TD PORT="block_size_size">4</TD><TD>u4be</TD><TD PORT="block_size_type">block_size</TD></TR>
</TABLE>>];
index_entry__inst__block [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="block_pos">block_offset</TD><TD PORT="block_size">...</TD><TD>BlockEntry</TD><TD PORT="block_type">block</TD></TR>
</TABLE>>];
subgraph cluster__block_entry {
label="Tsm::Index::IndexHeader::IndexEntry::BlockEntry";
graph[style=dotted];
block_entry__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
<TR><TD PORT="crc32_pos">0</TD><TD PORT="crc32_size">4</TD><TD>u4be</TD><TD PORT="crc32_type">crc32</TD></TR>
<TR><TD PORT="data_pos">4</TD><TD PORT="data_size">(_parent.block_size - 4)</TD><TD></TD><TD PORT="data_type">data</TD></TR>
</TABLE>>];
}
}
}
}
}
tsm__seq:header_type -> header__seq [style=bold];
tsm__inst__index:index_type -> index__seq [style=bold];
index__seq:offset_type -> index__inst__entries:entries_pos [color="#404040"];
index__inst__entries:entries_type -> index_header__seq [style=bold];
index_header__seq:key_len_type -> index_header__seq:key_size [color="#404040"];
index_header__seq:index_entries_type -> index_entry__seq [style=bold];
index_header__seq:entry_count_type -> index_header__seq:index_entries__repeat [color="#404040"];
index_entry__seq:block_offset_type -> index_entry__inst__block:block_pos [color="#404040"];
index_entry__inst__block:block_type -> block_entry__seq [style=bold];
index_entry__seq:block_size_type -> block_entry__seq:data_size [color="#404040"];
}