InfluxDB TSM file: GraphViz block diagram (.dot) source

InfluxDB is a scalable database optimized for storage of time series, real-time application metrics, operations monitoring events, etc, written in Go.

Data is stored in .tsm files, which are kept pretty simple conceptually. Each .tsm file contains a header and footer, which stores offset to an index. Index is used to find a data block for a requested time boundary.

Application

InfluxDB

File extension

tsm

KS implementation details

License: MIT

References

This page hosts a formal specification of InfluxDB TSM file using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.

GraphViz block diagram source

tsm.dot

digraph {
	rankdir=LR;
	node [shape=plaintext];
	subgraph cluster__tsm {
		label="Tsm";
		graph[style=dotted];

		tsm__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
			<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
			<TR><TD PORT="header_pos">0</TD><TD PORT="header_size">5</TD><TD>Header</TD><TD PORT="header_type">header</TD></TR>
		</TABLE>>];
		tsm__inst__index [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
			<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
			<TR><TD PORT="index_pos">(_io.size - 8)</TD><TD PORT="index_size">8</TD><TD>Index</TD><TD PORT="index_type">index</TD></TR>
		</TABLE>>];
		subgraph cluster__header {
			label="Tsm::Header";
			graph[style=dotted];

			header__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
				<TR><TD PORT="magic_pos">0</TD><TD PORT="magic_size">4</TD><TD></TD><TD PORT="magic_type">magic</TD></TR>
				<TR><TD PORT="version_pos">4</TD><TD PORT="version_size">1</TD><TD>u1</TD><TD PORT="version_type">version</TD></TR>
			</TABLE>>];
		}
		subgraph cluster__index {
			label="Tsm::Index";
			graph[style=dotted];

			index__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
				<TR><TD PORT="offset_pos">0</TD><TD PORT="offset_size">8</TD><TD>u8be</TD><TD PORT="offset_type">offset</TD></TR>
			</TABLE>>];
			index__inst__entries [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
				<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
				<TR><TD PORT="entries_pos">offset</TD><TD PORT="entries_size">...</TD><TD>IndexHeader</TD><TD PORT="entries_type">entries</TD></TR>
				<TR><TD COLSPAN="4" PORT="entries__repeat">repeat until _io.pos == (_io.size - 8)</TD></TR>
			</TABLE>>];
			subgraph cluster__index_header {
				label="Tsm::Index::IndexHeader";
				graph[style=dotted];

				index_header__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
					<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
					<TR><TD PORT="key_len_pos">0</TD><TD PORT="key_len_size">2</TD><TD>u2be</TD><TD PORT="key_len_type">key_len</TD></TR>
					<TR><TD PORT="key_pos">2</TD><TD PORT="key_size">key_len</TD><TD>str(UTF-8)</TD><TD PORT="key_type">key</TD></TR>
					<TR><TD PORT="type_pos">...</TD><TD PORT="type_size">1</TD><TD>u1</TD><TD PORT="type_type">type</TD></TR>
					<TR><TD PORT="entry_count_pos">...</TD><TD PORT="entry_count_size">2</TD><TD>u2be</TD><TD PORT="entry_count_type">entry_count</TD></TR>
					<TR><TD PORT="index_entries_pos">...</TD><TD PORT="index_entries_size">28</TD><TD>IndexEntry</TD><TD PORT="index_entries_type">index_entries</TD></TR>
					<TR><TD COLSPAN="4" PORT="index_entries__repeat">repeat entry_count times</TD></TR>
				</TABLE>>];
				subgraph cluster__index_entry {
					label="Tsm::Index::IndexHeader::IndexEntry";
					graph[style=dotted];

					index_entry__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
						<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
						<TR><TD PORT="min_time_pos">0</TD><TD PORT="min_time_size">8</TD><TD>u8be</TD><TD PORT="min_time_type">min_time</TD></TR>
						<TR><TD PORT="max_time_pos">8</TD><TD PORT="max_time_size">8</TD><TD>u8be</TD><TD PORT="max_time_type">max_time</TD></TR>
						<TR><TD PORT="block_offset_pos">16</TD><TD PORT="block_offset_size">8</TD><TD>u8be</TD><TD PORT="block_offset_type">block_offset</TD></TR>
						<TR><TD PORT="block_size_pos">24</TD><TD PORT="block_size_size">4</TD><TD>u4be</TD><TD PORT="block_size_type">block_size</TD></TR>
					</TABLE>>];
					index_entry__inst__block [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
						<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
						<TR><TD PORT="block_pos">block_offset</TD><TD PORT="block_size">...</TD><TD>BlockEntry</TD><TD PORT="block_type">block</TD></TR>
					</TABLE>>];
					subgraph cluster__block_entry {
						label="Tsm::Index::IndexHeader::IndexEntry::BlockEntry";
						graph[style=dotted];

						block_entry__seq [label=<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
							<TR><TD BGCOLOR="#E0FFE0">pos</TD><TD BGCOLOR="#E0FFE0">size</TD><TD BGCOLOR="#E0FFE0">type</TD><TD BGCOLOR="#E0FFE0">id</TD></TR>
							<TR><TD PORT="crc32_pos">0</TD><TD PORT="crc32_size">4</TD><TD>u4be</TD><TD PORT="crc32_type">crc32</TD></TR>
							<TR><TD PORT="data_pos">4</TD><TD PORT="data_size">(_parent.block_size - 4)</TD><TD></TD><TD PORT="data_type">data</TD></TR>
						</TABLE>>];
					}
				}
			}
		}
	}
	tsm__seq:header_type -> header__seq [style=bold];
	tsm__inst__index:index_type -> index__seq [style=bold];
	index__seq:offset_type -> index__inst__entries:entries_pos [color="#404040"];
	index__inst__entries:entries_type -> index_header__seq [style=bold];
	index_header__seq:key_len_type -> index_header__seq:key_size [color="#404040"];
	index_header__seq:index_entries_type -> index_entry__seq [style=bold];
	index_header__seq:entry_count_type -> index_header__seq:index_entries__repeat [color="#404040"];
	index_entry__seq:block_offset_type -> index_entry__inst__block:block_pos [color="#404040"];
	index_entry__inst__block:block_type -> block_entry__seq [style=bold];
	index_entry__seq:block_size_type -> block_entry__seq:data_size [color="#404040"];
}