.dbf file format of dBASE: C++11/STL parsing library

.dbf is a relational database format introduced in DOS database management system dBASE in 1982.

One .dbf file corresponds to one table and contains a series of headers, specification of fields, and a number of fixed-size records.

This page hosts a formal specification of .dbf file format of dBASE using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.

Usage

Runtime library

All parsing code for C++11/STL generated by Kaitai Struct depends on the C++/STL runtime library. You have to install it before you can parse data.

For C++, the easiest way is to clone the runtime library sources and build them along with your project.

Code

Using Kaitai Struct in C++/STL usually consists of 3 steps.

  1. We need to create an STL input stream (std::istream). One can open local file for that, or use existing std::string or char* buffer.
    #include <fstream>
    
    std::ifstream is("path/to/local/file.dbf", std::ifstream::binary);
    
    #include <sstream>
    
    std::istringstream is(str);
    
    #include <sstream>
    
    const char buf[] = { ... };
    std::string str(buf, sizeof buf);
    std::istringstream is(str);
    
  2. We need to wrap our input stream into Kaitai stream:
    #include "kaitai/kaitaistream.h"
    
    kaitai::kstream ks(&is);
    
  3. And finally, we can invoke the parsing:
    dbf_t data(&ks);
    

After that, one can get various attributes from the structure by invoking getter methods like:

data.header1() // => get header1

C++11/STL source code to parse .dbf file format of dBASE

dbf.h

#pragma once

// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild

#include "kaitai/kaitaistruct.h"
#include <stdint.h>
#include <memory>
#include <vector>

#if KAITAI_STRUCT_VERSION < 9000L
#error "Incompatible Kaitai Struct C++/STL API: version 0.9 or later is required"
#endif

/**
 * .dbf is a relational database format introduced in DOS database
 * management system dBASE in 1982.
 * 
 * One .dbf file corresponds to one table and contains a series of headers,
 * specification of fields, and a number of fixed-size records.
 * \sa http://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm Source
 */

class dbf_t : public kaitai::kstruct {

public:
    class header2_t;
    class field_t;
    class header1_t;
    class header_dbase_3_t;
    class header_dbase_7_t;
    class record_t;

    enum delete_state_t {
        DELETE_STATE_FALSE = 32,
        DELETE_STATE_TRUE = 42
    };

    dbf_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent = nullptr, dbf_t* p__root = nullptr);

private:
    void _read();
    void _clean_up();

public:
    ~dbf_t();

    class header2_t : public kaitai::kstruct {

    public:

        header2_t(kaitai::kstream* p__io, dbf_t* p__parent = nullptr, dbf_t* p__root = nullptr);

    private:
        void _read();
        void _clean_up();

    public:
        ~header2_t();

    private:
        std::unique_ptr<header_dbase_3_t> m_header_dbase_3;
        bool n_header_dbase_3;

    public:
        bool _is_null_header_dbase_3() { header_dbase_3(); return n_header_dbase_3; };

    private:
        std::unique_ptr<header_dbase_7_t> m_header_dbase_7;
        bool n_header_dbase_7;

    public:
        bool _is_null_header_dbase_7() { header_dbase_7(); return n_header_dbase_7; };

    private:
        std::unique_ptr<std::vector<std::unique_ptr<field_t>>> m_fields;
        dbf_t* m__root;
        dbf_t* m__parent;

    public:
        header_dbase_3_t* header_dbase_3() const { return m_header_dbase_3.get(); }
        header_dbase_7_t* header_dbase_7() const { return m_header_dbase_7.get(); }
        std::vector<std::unique_ptr<field_t>>* fields() const { return m_fields.get(); }
        dbf_t* _root() const { return m__root; }
        dbf_t* _parent() const { return m__parent; }
    };

    class field_t : public kaitai::kstruct {

    public:

        field_t(kaitai::kstream* p__io, dbf_t::header2_t* p__parent = nullptr, dbf_t* p__root = nullptr);

    private:
        void _read();
        void _clean_up();

    public:
        ~field_t();

    private:
        std::string m_name;
        uint8_t m_datatype;
        uint32_t m_data_address;
        uint8_t m_length;
        uint8_t m_decimal_count;
        std::string m_reserved1;
        uint8_t m_work_area_id;
        std::string m_reserved2;
        uint8_t m_set_fields_flag;
        std::string m_reserved3;
        dbf_t* m__root;
        dbf_t::header2_t* m__parent;

    public:
        std::string name() const { return m_name; }
        uint8_t datatype() const { return m_datatype; }
        uint32_t data_address() const { return m_data_address; }
        uint8_t length() const { return m_length; }
        uint8_t decimal_count() const { return m_decimal_count; }
        std::string reserved1() const { return m_reserved1; }
        uint8_t work_area_id() const { return m_work_area_id; }
        std::string reserved2() const { return m_reserved2; }
        uint8_t set_fields_flag() const { return m_set_fields_flag; }
        std::string reserved3() const { return m_reserved3; }
        dbf_t* _root() const { return m__root; }
        dbf_t::header2_t* _parent() const { return m__parent; }
    };

    /**
     * \sa http://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm - section 1.1
     */

    class header1_t : public kaitai::kstruct {

    public:

        header1_t(kaitai::kstream* p__io, dbf_t* p__parent = nullptr, dbf_t* p__root = nullptr);

    private:
        void _read();
        void _clean_up();

    public:
        ~header1_t();

    private:
        bool f_dbase_level;
        int32_t m_dbase_level;

    public:
        int32_t dbase_level();

    private:
        uint8_t m_version;
        uint8_t m_last_update_y;
        uint8_t m_last_update_m;
        uint8_t m_last_update_d;
        uint32_t m_num_records;
        uint16_t m_len_header;
        uint16_t m_len_record;
        dbf_t* m__root;
        dbf_t* m__parent;

    public:
        uint8_t version() const { return m_version; }
        uint8_t last_update_y() const { return m_last_update_y; }
        uint8_t last_update_m() const { return m_last_update_m; }
        uint8_t last_update_d() const { return m_last_update_d; }
        uint32_t num_records() const { return m_num_records; }
        uint16_t len_header() const { return m_len_header; }
        uint16_t len_record() const { return m_len_record; }
        dbf_t* _root() const { return m__root; }
        dbf_t* _parent() const { return m__parent; }
    };

    class header_dbase_3_t : public kaitai::kstruct {

    public:

        header_dbase_3_t(kaitai::kstream* p__io, dbf_t::header2_t* p__parent = nullptr, dbf_t* p__root = nullptr);

    private:
        void _read();
        void _clean_up();

    public:
        ~header_dbase_3_t();

    private:
        std::string m_reserved1;
        std::string m_reserved2;
        std::string m_reserved3;
        dbf_t* m__root;
        dbf_t::header2_t* m__parent;

    public:
        std::string reserved1() const { return m_reserved1; }
        std::string reserved2() const { return m_reserved2; }
        std::string reserved3() const { return m_reserved3; }
        dbf_t* _root() const { return m__root; }
        dbf_t::header2_t* _parent() const { return m__parent; }
    };

    class header_dbase_7_t : public kaitai::kstruct {

    public:

        header_dbase_7_t(kaitai::kstream* p__io, dbf_t::header2_t* p__parent = nullptr, dbf_t* p__root = nullptr);

    private:
        void _read();
        void _clean_up();

    public:
        ~header_dbase_7_t();

    private:
        std::string m_reserved1;
        uint8_t m_has_incomplete_transaction;
        uint8_t m_dbase_iv_encryption;
        std::string m_reserved2;
        uint8_t m_production_mdx;
        uint8_t m_language_driver_id;
        std::string m_reserved3;
        std::string m_language_driver_name;
        std::string m_reserved4;
        dbf_t* m__root;
        dbf_t::header2_t* m__parent;

    public:
        std::string reserved1() const { return m_reserved1; }
        uint8_t has_incomplete_transaction() const { return m_has_incomplete_transaction; }
        uint8_t dbase_iv_encryption() const { return m_dbase_iv_encryption; }
        std::string reserved2() const { return m_reserved2; }
        uint8_t production_mdx() const { return m_production_mdx; }
        uint8_t language_driver_id() const { return m_language_driver_id; }
        std::string reserved3() const { return m_reserved3; }
        std::string language_driver_name() const { return m_language_driver_name; }
        std::string reserved4() const { return m_reserved4; }
        dbf_t* _root() const { return m__root; }
        dbf_t::header2_t* _parent() const { return m__parent; }
    };

    class record_t : public kaitai::kstruct {

    public:

        record_t(kaitai::kstream* p__io, dbf_t* p__parent = nullptr, dbf_t* p__root = nullptr);

    private:
        void _read();
        void _clean_up();

    public:
        ~record_t();

    private:
        delete_state_t m_deleted;
        std::unique_ptr<std::vector<std::string>> m_record_fields;
        dbf_t* m__root;
        dbf_t* m__parent;

    public:
        delete_state_t deleted() const { return m_deleted; }
        std::vector<std::string>* record_fields() const { return m_record_fields.get(); }
        dbf_t* _root() const { return m__root; }
        dbf_t* _parent() const { return m__parent; }
    };

private:
    std::unique_ptr<header1_t> m_header1;
    std::unique_ptr<header2_t> m_header2;
    std::string m_header_terminator;
    std::unique_ptr<std::vector<std::unique_ptr<record_t>>> m_records;
    dbf_t* m__root;
    kaitai::kstruct* m__parent;
    std::string m__raw_header2;
    std::unique_ptr<kaitai::kstream> m__io__raw_header2;
    std::unique_ptr<std::vector<std::string>> m__raw_records;
    std::unique_ptr<std::vector<std::unique_ptr<kaitai::kstream>>> m__io__raw_records;

public:
    header1_t* header1() const { return m_header1.get(); }
    header2_t* header2() const { return m_header2.get(); }
    std::string header_terminator() const { return m_header_terminator; }
    std::vector<std::unique_ptr<record_t>>* records() const { return m_records.get(); }
    dbf_t* _root() const { return m__root; }
    kaitai::kstruct* _parent() const { return m__parent; }
    std::string _raw_header2() const { return m__raw_header2; }
    kaitai::kstream* _io__raw_header2() const { return m__io__raw_header2.get(); }
    std::vector<std::string>* _raw_records() const { return m__raw_records.get(); }
    std::vector<std::unique_ptr<kaitai::kstream>>* _io__raw_records() const { return m__io__raw_records.get(); }
};

dbf.cpp

// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild

#include "dbf.h"
#include "kaitai/exceptions.h"

dbf_t::dbf_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent, dbf_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = this;
    m_header1 = nullptr;
    m_header2 = nullptr;
    m__io__raw_header2 = nullptr;
    m_records = nullptr;
    m__raw_records = nullptr;
    m__io__raw_records = nullptr;
    _read();
}

void dbf_t::_read() {
    m_header1 = std::unique_ptr<header1_t>(new header1_t(m__io, this, m__root));
    m__raw_header2 = m__io->read_bytes(((header1()->len_header() - 12) - 1));
    m__io__raw_header2 = std::unique_ptr<kaitai::kstream>(new kaitai::kstream(m__raw_header2));
    m_header2 = std::unique_ptr<header2_t>(new header2_t(m__io__raw_header2.get(), this, m__root));
    m_header_terminator = m__io->read_bytes(1);
    if (!(header_terminator() == std::string("\x0D", 1))) {
        throw kaitai::validation_not_equal_error<std::string>(std::string("\x0D", 1), header_terminator(), _io(), std::string("/seq/2"));
    }
    m__raw_records = std::unique_ptr<std::vector<std::string>>(new std::vector<std::string>());
    m__io__raw_records = std::unique_ptr<std::vector<std::unique_ptr<kaitai::kstream>>>(new std::vector<std::unique_ptr<kaitai::kstream>>());
    m_records = std::unique_ptr<std::vector<std::unique_ptr<record_t>>>(new std::vector<std::unique_ptr<record_t>>());
    const int l_records = header1()->num_records();
    for (int i = 0; i < l_records; i++) {
        m__raw_records->push_back(std::move(m__io->read_bytes(header1()->len_record())));
        kaitai::kstream* io__raw_records = new kaitai::kstream(m__raw_records->at(m__raw_records->size() - 1));
        m__io__raw_records->emplace_back(io__raw_records);
        m_records->push_back(std::move(std::unique_ptr<record_t>(new record_t(io__raw_records, this, m__root))));
    }
}

dbf_t::~dbf_t() {
    _clean_up();
}

void dbf_t::_clean_up() {
}

dbf_t::header2_t::header2_t(kaitai::kstream* p__io, dbf_t* p__parent, dbf_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    m_header_dbase_3 = nullptr;
    m_header_dbase_7 = nullptr;
    m_fields = nullptr;
    _read();
}

void dbf_t::header2_t::_read() {
    n_header_dbase_3 = true;
    if (_root()->header1()->dbase_level() == 3) {
        n_header_dbase_3 = false;
        m_header_dbase_3 = std::unique_ptr<header_dbase_3_t>(new header_dbase_3_t(m__io, this, m__root));
    }
    n_header_dbase_7 = true;
    if (_root()->header1()->dbase_level() == 7) {
        n_header_dbase_7 = false;
        m_header_dbase_7 = std::unique_ptr<header_dbase_7_t>(new header_dbase_7_t(m__io, this, m__root));
    }
    m_fields = std::unique_ptr<std::vector<std::unique_ptr<field_t>>>(new std::vector<std::unique_ptr<field_t>>());
    {
        int i = 0;
        while (!m__io->is_eof()) {
            m_fields->push_back(std::move(std::unique_ptr<field_t>(new field_t(m__io, this, m__root))));
            i++;
        }
    }
}

dbf_t::header2_t::~header2_t() {
    _clean_up();
}

void dbf_t::header2_t::_clean_up() {
    if (!n_header_dbase_3) {
    }
    if (!n_header_dbase_7) {
    }
}

dbf_t::field_t::field_t(kaitai::kstream* p__io, dbf_t::header2_t* p__parent, dbf_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    _read();
}

void dbf_t::field_t::_read() {
    m_name = kaitai::kstream::bytes_to_str(kaitai::kstream::bytes_terminate(m__io->read_bytes(11), 0, false), std::string("ASCII"));
    m_datatype = m__io->read_u1();
    m_data_address = m__io->read_u4le();
    m_length = m__io->read_u1();
    m_decimal_count = m__io->read_u1();
    m_reserved1 = m__io->read_bytes(2);
    m_work_area_id = m__io->read_u1();
    m_reserved2 = m__io->read_bytes(2);
    m_set_fields_flag = m__io->read_u1();
    m_reserved3 = m__io->read_bytes(8);
}

dbf_t::field_t::~field_t() {
    _clean_up();
}

void dbf_t::field_t::_clean_up() {
}

dbf_t::header1_t::header1_t(kaitai::kstream* p__io, dbf_t* p__parent, dbf_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    f_dbase_level = false;
    _read();
}

void dbf_t::header1_t::_read() {
    m_version = m__io->read_u1();
    m_last_update_y = m__io->read_u1();
    m_last_update_m = m__io->read_u1();
    m_last_update_d = m__io->read_u1();
    m_num_records = m__io->read_u4le();
    m_len_header = m__io->read_u2le();
    m_len_record = m__io->read_u2le();
}

dbf_t::header1_t::~header1_t() {
    _clean_up();
}

void dbf_t::header1_t::_clean_up() {
}

int32_t dbf_t::header1_t::dbase_level() {
    if (f_dbase_level)
        return m_dbase_level;
    m_dbase_level = (version() & 7);
    f_dbase_level = true;
    return m_dbase_level;
}

dbf_t::header_dbase_3_t::header_dbase_3_t(kaitai::kstream* p__io, dbf_t::header2_t* p__parent, dbf_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    _read();
}

void dbf_t::header_dbase_3_t::_read() {
    m_reserved1 = m__io->read_bytes(3);
    m_reserved2 = m__io->read_bytes(13);
    m_reserved3 = m__io->read_bytes(4);
}

dbf_t::header_dbase_3_t::~header_dbase_3_t() {
    _clean_up();
}

void dbf_t::header_dbase_3_t::_clean_up() {
}

dbf_t::header_dbase_7_t::header_dbase_7_t(kaitai::kstream* p__io, dbf_t::header2_t* p__parent, dbf_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    _read();
}

void dbf_t::header_dbase_7_t::_read() {
    m_reserved1 = m__io->read_bytes(2);
    if (!(reserved1() == std::string("\x00\x00", 2))) {
        throw kaitai::validation_not_equal_error<std::string>(std::string("\x00\x00", 2), reserved1(), _io(), std::string("/types/header_dbase_7/seq/0"));
    }
    m_has_incomplete_transaction = m__io->read_u1();
    m_dbase_iv_encryption = m__io->read_u1();
    m_reserved2 = m__io->read_bytes(12);
    m_production_mdx = m__io->read_u1();
    m_language_driver_id = m__io->read_u1();
    m_reserved3 = m__io->read_bytes(2);
    if (!(reserved3() == std::string("\x00\x00", 2))) {
        throw kaitai::validation_not_equal_error<std::string>(std::string("\x00\x00", 2), reserved3(), _io(), std::string("/types/header_dbase_7/seq/6"));
    }
    m_language_driver_name = m__io->read_bytes(32);
    m_reserved4 = m__io->read_bytes(4);
}

dbf_t::header_dbase_7_t::~header_dbase_7_t() {
    _clean_up();
}

void dbf_t::header_dbase_7_t::_clean_up() {
}

dbf_t::record_t::record_t(kaitai::kstream* p__io, dbf_t* p__parent, dbf_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    m_record_fields = nullptr;
    _read();
}

void dbf_t::record_t::_read() {
    m_deleted = static_cast<dbf_t::delete_state_t>(m__io->read_u1());
    m_record_fields = std::unique_ptr<std::vector<std::string>>(new std::vector<std::string>());
    const int l_record_fields = _root()->header2()->fields()->size();
    for (int i = 0; i < l_record_fields; i++) {
        m_record_fields->push_back(std::move(m__io->read_bytes(_root()->header2()->fields()->at(i)->length())));
    }
}

dbf_t::record_t::~record_t() {
    _clean_up();
}

void dbf_t::record_t::_clean_up() {
}