.dbf is a relational database format introduced in DOS database management system dBASE in 1982.
One .dbf file corresponds to one table and contains a series of headers, specification of fields, and a number of fixed-size records.
This page hosts a formal specification of .dbf file format of dBASE using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.
All parsing code for C++11/STL generated by Kaitai Struct depends on the C++/STL runtime library. You have to install it before you can parse data.
For C++, the easiest way is to clone the runtime library sources and build them along with your project.
Using Kaitai Struct in C++/STL usually consists of 3 steps.
std::istream
). One can open local file for that, or use existing std::string
or char*
buffer.
#include <fstream>
std::ifstream is("path/to/local/file.dbf", std::ifstream::binary);
#include <sstream>
std::istringstream is(str);
#include <sstream>
const char buf[] = { ... };
std::string str(buf, sizeof buf);
std::istringstream is(str);
#include "kaitai/kaitaistream.h"
kaitai::kstream ks(&is);
dbf_t data(&ks);
After that, one can get various attributes from the structure by invoking getter methods like:
data.header1() // => get header1
#pragma once
// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild
#include "kaitai/kaitaistruct.h"
#include <stdint.h>
#include <memory>
#include <vector>
#if KAITAI_STRUCT_VERSION < 9000L
#error "Incompatible Kaitai Struct C++/STL API: version 0.9 or later is required"
#endif
/**
* .dbf is a relational database format introduced in DOS database
* management system dBASE in 1982.
*
* One .dbf file corresponds to one table and contains a series of headers,
* specification of fields, and a number of fixed-size records.
* \sa http://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm Source
*/
class dbf_t : public kaitai::kstruct {
public:
class header2_t;
class field_t;
class header1_t;
class header_dbase_3_t;
class header_dbase_7_t;
class record_t;
enum delete_state_t {
DELETE_STATE_FALSE = 32,
DELETE_STATE_TRUE = 42
};
dbf_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent = nullptr, dbf_t* p__root = nullptr);
private:
void _read();
void _clean_up();
public:
~dbf_t();
class header2_t : public kaitai::kstruct {
public:
header2_t(kaitai::kstream* p__io, dbf_t* p__parent = nullptr, dbf_t* p__root = nullptr);
private:
void _read();
void _clean_up();
public:
~header2_t();
private:
std::unique_ptr<header_dbase_3_t> m_header_dbase_3;
bool n_header_dbase_3;
public:
bool _is_null_header_dbase_3() { header_dbase_3(); return n_header_dbase_3; };
private:
std::unique_ptr<header_dbase_7_t> m_header_dbase_7;
bool n_header_dbase_7;
public:
bool _is_null_header_dbase_7() { header_dbase_7(); return n_header_dbase_7; };
private:
std::unique_ptr<std::vector<std::unique_ptr<field_t>>> m_fields;
dbf_t* m__root;
dbf_t* m__parent;
public:
header_dbase_3_t* header_dbase_3() const { return m_header_dbase_3.get(); }
header_dbase_7_t* header_dbase_7() const { return m_header_dbase_7.get(); }
std::vector<std::unique_ptr<field_t>>* fields() const { return m_fields.get(); }
dbf_t* _root() const { return m__root; }
dbf_t* _parent() const { return m__parent; }
};
class field_t : public kaitai::kstruct {
public:
field_t(kaitai::kstream* p__io, dbf_t::header2_t* p__parent = nullptr, dbf_t* p__root = nullptr);
private:
void _read();
void _clean_up();
public:
~field_t();
private:
std::string m_name;
uint8_t m_datatype;
uint32_t m_data_address;
uint8_t m_length;
uint8_t m_decimal_count;
std::string m_reserved1;
uint8_t m_work_area_id;
std::string m_reserved2;
uint8_t m_set_fields_flag;
std::string m_reserved3;
dbf_t* m__root;
dbf_t::header2_t* m__parent;
public:
std::string name() const { return m_name; }
uint8_t datatype() const { return m_datatype; }
uint32_t data_address() const { return m_data_address; }
uint8_t length() const { return m_length; }
uint8_t decimal_count() const { return m_decimal_count; }
std::string reserved1() const { return m_reserved1; }
uint8_t work_area_id() const { return m_work_area_id; }
std::string reserved2() const { return m_reserved2; }
uint8_t set_fields_flag() const { return m_set_fields_flag; }
std::string reserved3() const { return m_reserved3; }
dbf_t* _root() const { return m__root; }
dbf_t::header2_t* _parent() const { return m__parent; }
};
/**
* \sa http://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm - section 1.1
*/
class header1_t : public kaitai::kstruct {
public:
header1_t(kaitai::kstream* p__io, dbf_t* p__parent = nullptr, dbf_t* p__root = nullptr);
private:
void _read();
void _clean_up();
public:
~header1_t();
private:
bool f_dbase_level;
int32_t m_dbase_level;
public:
int32_t dbase_level();
private:
uint8_t m_version;
uint8_t m_last_update_y;
uint8_t m_last_update_m;
uint8_t m_last_update_d;
uint32_t m_num_records;
uint16_t m_len_header;
uint16_t m_len_record;
dbf_t* m__root;
dbf_t* m__parent;
public:
uint8_t version() const { return m_version; }
uint8_t last_update_y() const { return m_last_update_y; }
uint8_t last_update_m() const { return m_last_update_m; }
uint8_t last_update_d() const { return m_last_update_d; }
uint32_t num_records() const { return m_num_records; }
uint16_t len_header() const { return m_len_header; }
uint16_t len_record() const { return m_len_record; }
dbf_t* _root() const { return m__root; }
dbf_t* _parent() const { return m__parent; }
};
class header_dbase_3_t : public kaitai::kstruct {
public:
header_dbase_3_t(kaitai::kstream* p__io, dbf_t::header2_t* p__parent = nullptr, dbf_t* p__root = nullptr);
private:
void _read();
void _clean_up();
public:
~header_dbase_3_t();
private:
std::string m_reserved1;
std::string m_reserved2;
std::string m_reserved3;
dbf_t* m__root;
dbf_t::header2_t* m__parent;
public:
std::string reserved1() const { return m_reserved1; }
std::string reserved2() const { return m_reserved2; }
std::string reserved3() const { return m_reserved3; }
dbf_t* _root() const { return m__root; }
dbf_t::header2_t* _parent() const { return m__parent; }
};
class header_dbase_7_t : public kaitai::kstruct {
public:
header_dbase_7_t(kaitai::kstream* p__io, dbf_t::header2_t* p__parent = nullptr, dbf_t* p__root = nullptr);
private:
void _read();
void _clean_up();
public:
~header_dbase_7_t();
private:
std::string m_reserved1;
uint8_t m_has_incomplete_transaction;
uint8_t m_dbase_iv_encryption;
std::string m_reserved2;
uint8_t m_production_mdx;
uint8_t m_language_driver_id;
std::string m_reserved3;
std::string m_language_driver_name;
std::string m_reserved4;
dbf_t* m__root;
dbf_t::header2_t* m__parent;
public:
std::string reserved1() const { return m_reserved1; }
uint8_t has_incomplete_transaction() const { return m_has_incomplete_transaction; }
uint8_t dbase_iv_encryption() const { return m_dbase_iv_encryption; }
std::string reserved2() const { return m_reserved2; }
uint8_t production_mdx() const { return m_production_mdx; }
uint8_t language_driver_id() const { return m_language_driver_id; }
std::string reserved3() const { return m_reserved3; }
std::string language_driver_name() const { return m_language_driver_name; }
std::string reserved4() const { return m_reserved4; }
dbf_t* _root() const { return m__root; }
dbf_t::header2_t* _parent() const { return m__parent; }
};
class record_t : public kaitai::kstruct {
public:
record_t(kaitai::kstream* p__io, dbf_t* p__parent = nullptr, dbf_t* p__root = nullptr);
private:
void _read();
void _clean_up();
public:
~record_t();
private:
delete_state_t m_deleted;
std::unique_ptr<std::vector<std::string>> m_record_fields;
dbf_t* m__root;
dbf_t* m__parent;
public:
delete_state_t deleted() const { return m_deleted; }
std::vector<std::string>* record_fields() const { return m_record_fields.get(); }
dbf_t* _root() const { return m__root; }
dbf_t* _parent() const { return m__parent; }
};
private:
std::unique_ptr<header1_t> m_header1;
std::unique_ptr<header2_t> m_header2;
std::string m_header_terminator;
std::unique_ptr<std::vector<std::unique_ptr<record_t>>> m_records;
dbf_t* m__root;
kaitai::kstruct* m__parent;
std::string m__raw_header2;
std::unique_ptr<kaitai::kstream> m__io__raw_header2;
std::unique_ptr<std::vector<std::string>> m__raw_records;
std::unique_ptr<std::vector<std::unique_ptr<kaitai::kstream>>> m__io__raw_records;
public:
header1_t* header1() const { return m_header1.get(); }
header2_t* header2() const { return m_header2.get(); }
std::string header_terminator() const { return m_header_terminator; }
std::vector<std::unique_ptr<record_t>>* records() const { return m_records.get(); }
dbf_t* _root() const { return m__root; }
kaitai::kstruct* _parent() const { return m__parent; }
std::string _raw_header2() const { return m__raw_header2; }
kaitai::kstream* _io__raw_header2() const { return m__io__raw_header2.get(); }
std::vector<std::string>* _raw_records() const { return m__raw_records.get(); }
std::vector<std::unique_ptr<kaitai::kstream>>* _io__raw_records() const { return m__io__raw_records.get(); }
};
// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild
#include "dbf.h"
#include "kaitai/exceptions.h"
dbf_t::dbf_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent, dbf_t* p__root) : kaitai::kstruct(p__io) {
m__parent = p__parent;
m__root = this;
m_header1 = nullptr;
m_header2 = nullptr;
m__io__raw_header2 = nullptr;
m_records = nullptr;
m__raw_records = nullptr;
m__io__raw_records = nullptr;
_read();
}
void dbf_t::_read() {
m_header1 = std::unique_ptr<header1_t>(new header1_t(m__io, this, m__root));
m__raw_header2 = m__io->read_bytes(((header1()->len_header() - 12) - 1));
m__io__raw_header2 = std::unique_ptr<kaitai::kstream>(new kaitai::kstream(m__raw_header2));
m_header2 = std::unique_ptr<header2_t>(new header2_t(m__io__raw_header2.get(), this, m__root));
m_header_terminator = m__io->read_bytes(1);
if (!(header_terminator() == std::string("\x0D", 1))) {
throw kaitai::validation_not_equal_error<std::string>(std::string("\x0D", 1), header_terminator(), _io(), std::string("/seq/2"));
}
m__raw_records = std::unique_ptr<std::vector<std::string>>(new std::vector<std::string>());
m__io__raw_records = std::unique_ptr<std::vector<std::unique_ptr<kaitai::kstream>>>(new std::vector<std::unique_ptr<kaitai::kstream>>());
m_records = std::unique_ptr<std::vector<std::unique_ptr<record_t>>>(new std::vector<std::unique_ptr<record_t>>());
const int l_records = header1()->num_records();
for (int i = 0; i < l_records; i++) {
m__raw_records->push_back(std::move(m__io->read_bytes(header1()->len_record())));
kaitai::kstream* io__raw_records = new kaitai::kstream(m__raw_records->at(m__raw_records->size() - 1));
m__io__raw_records->emplace_back(io__raw_records);
m_records->push_back(std::move(std::unique_ptr<record_t>(new record_t(io__raw_records, this, m__root))));
}
}
dbf_t::~dbf_t() {
_clean_up();
}
void dbf_t::_clean_up() {
}
dbf_t::header2_t::header2_t(kaitai::kstream* p__io, dbf_t* p__parent, dbf_t* p__root) : kaitai::kstruct(p__io) {
m__parent = p__parent;
m__root = p__root;
m_header_dbase_3 = nullptr;
m_header_dbase_7 = nullptr;
m_fields = nullptr;
_read();
}
void dbf_t::header2_t::_read() {
n_header_dbase_3 = true;
if (_root()->header1()->dbase_level() == 3) {
n_header_dbase_3 = false;
m_header_dbase_3 = std::unique_ptr<header_dbase_3_t>(new header_dbase_3_t(m__io, this, m__root));
}
n_header_dbase_7 = true;
if (_root()->header1()->dbase_level() == 7) {
n_header_dbase_7 = false;
m_header_dbase_7 = std::unique_ptr<header_dbase_7_t>(new header_dbase_7_t(m__io, this, m__root));
}
m_fields = std::unique_ptr<std::vector<std::unique_ptr<field_t>>>(new std::vector<std::unique_ptr<field_t>>());
{
int i = 0;
while (!m__io->is_eof()) {
m_fields->push_back(std::move(std::unique_ptr<field_t>(new field_t(m__io, this, m__root))));
i++;
}
}
}
dbf_t::header2_t::~header2_t() {
_clean_up();
}
void dbf_t::header2_t::_clean_up() {
if (!n_header_dbase_3) {
}
if (!n_header_dbase_7) {
}
}
dbf_t::field_t::field_t(kaitai::kstream* p__io, dbf_t::header2_t* p__parent, dbf_t* p__root) : kaitai::kstruct(p__io) {
m__parent = p__parent;
m__root = p__root;
_read();
}
void dbf_t::field_t::_read() {
m_name = kaitai::kstream::bytes_to_str(kaitai::kstream::bytes_terminate(m__io->read_bytes(11), 0, false), std::string("ASCII"));
m_datatype = m__io->read_u1();
m_data_address = m__io->read_u4le();
m_length = m__io->read_u1();
m_decimal_count = m__io->read_u1();
m_reserved1 = m__io->read_bytes(2);
m_work_area_id = m__io->read_u1();
m_reserved2 = m__io->read_bytes(2);
m_set_fields_flag = m__io->read_u1();
m_reserved3 = m__io->read_bytes(8);
}
dbf_t::field_t::~field_t() {
_clean_up();
}
void dbf_t::field_t::_clean_up() {
}
dbf_t::header1_t::header1_t(kaitai::kstream* p__io, dbf_t* p__parent, dbf_t* p__root) : kaitai::kstruct(p__io) {
m__parent = p__parent;
m__root = p__root;
f_dbase_level = false;
_read();
}
void dbf_t::header1_t::_read() {
m_version = m__io->read_u1();
m_last_update_y = m__io->read_u1();
m_last_update_m = m__io->read_u1();
m_last_update_d = m__io->read_u1();
m_num_records = m__io->read_u4le();
m_len_header = m__io->read_u2le();
m_len_record = m__io->read_u2le();
}
dbf_t::header1_t::~header1_t() {
_clean_up();
}
void dbf_t::header1_t::_clean_up() {
}
int32_t dbf_t::header1_t::dbase_level() {
if (f_dbase_level)
return m_dbase_level;
m_dbase_level = (version() & 7);
f_dbase_level = true;
return m_dbase_level;
}
dbf_t::header_dbase_3_t::header_dbase_3_t(kaitai::kstream* p__io, dbf_t::header2_t* p__parent, dbf_t* p__root) : kaitai::kstruct(p__io) {
m__parent = p__parent;
m__root = p__root;
_read();
}
void dbf_t::header_dbase_3_t::_read() {
m_reserved1 = m__io->read_bytes(3);
m_reserved2 = m__io->read_bytes(13);
m_reserved3 = m__io->read_bytes(4);
}
dbf_t::header_dbase_3_t::~header_dbase_3_t() {
_clean_up();
}
void dbf_t::header_dbase_3_t::_clean_up() {
}
dbf_t::header_dbase_7_t::header_dbase_7_t(kaitai::kstream* p__io, dbf_t::header2_t* p__parent, dbf_t* p__root) : kaitai::kstruct(p__io) {
m__parent = p__parent;
m__root = p__root;
_read();
}
void dbf_t::header_dbase_7_t::_read() {
m_reserved1 = m__io->read_bytes(2);
if (!(reserved1() == std::string("\x00\x00", 2))) {
throw kaitai::validation_not_equal_error<std::string>(std::string("\x00\x00", 2), reserved1(), _io(), std::string("/types/header_dbase_7/seq/0"));
}
m_has_incomplete_transaction = m__io->read_u1();
m_dbase_iv_encryption = m__io->read_u1();
m_reserved2 = m__io->read_bytes(12);
m_production_mdx = m__io->read_u1();
m_language_driver_id = m__io->read_u1();
m_reserved3 = m__io->read_bytes(2);
if (!(reserved3() == std::string("\x00\x00", 2))) {
throw kaitai::validation_not_equal_error<std::string>(std::string("\x00\x00", 2), reserved3(), _io(), std::string("/types/header_dbase_7/seq/6"));
}
m_language_driver_name = m__io->read_bytes(32);
m_reserved4 = m__io->read_bytes(4);
}
dbf_t::header_dbase_7_t::~header_dbase_7_t() {
_clean_up();
}
void dbf_t::header_dbase_7_t::_clean_up() {
}
dbf_t::record_t::record_t(kaitai::kstream* p__io, dbf_t* p__parent, dbf_t* p__root) : kaitai::kstruct(p__io) {
m__parent = p__parent;
m__root = p__root;
m_record_fields = nullptr;
_read();
}
void dbf_t::record_t::_read() {
m_deleted = static_cast<dbf_t::delete_state_t>(m__io->read_u1());
m_record_fields = std::unique_ptr<std::vector<std::string>>(new std::vector<std::string>());
const int l_record_fields = _root()->header2()->fields()->size();
for (int i = 0; i < l_record_fields; i++) {
m_record_fields->push_back(std::move(m__io->read_bytes(_root()->header2()->fields()->at(i)->length())));
}
}
dbf_t::record_t::~record_t() {
_clean_up();
}
void dbf_t::record_t::_clean_up() {
}