.dbf file format of dBASE: C++98/STL parsing library

.dbf is a relational database format introduced in DOS database management system dBASE in 1982.

One .dbf file corresponds to one table and contains a series of headers, specification of fields, and a number of fixed-size records.

This page hosts a formal specification of .dbf file format of dBASE using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.

Usage

Runtime library

All parsing code for C++98/STL generated by Kaitai Struct depends on the C++/STL runtime library. You have to install it before you can parse data.

For C++, the easiest way is to clone the runtime library sources and build them along with your project.

Code

Using Kaitai Struct in C++/STL usually consists of 3 steps.

  1. We need to create an STL input stream (std::istream). One can open local file for that, or use existing std::string or char* buffer.
    #include <fstream>
    
    std::ifstream is("path/to/local/file.dbf", std::ifstream::binary);
    
    #include <sstream>
    
    std::istringstream is(str);
    
    #include <sstream>
    
    const char buf[] = { ... };
    std::string str(buf, sizeof buf);
    std::istringstream is(str);
    
  2. We need to wrap our input stream into Kaitai stream:
    #include "kaitai/kaitaistream.h"
    
    kaitai::kstream ks(&is);
    
  3. And finally, we can invoke the parsing:
    dbf_t data(&ks);
    

After that, one can get various attributes from the structure by invoking getter methods like:

data.header1() // => get header1

C++98/STL source code to parse .dbf file format of dBASE

dbf.h

#ifndef DBF_H_
#define DBF_H_

// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild

#include "kaitai/kaitaistruct.h"
#include <stdint.h>
#include <vector>

#if KAITAI_STRUCT_VERSION < 9000L
#error "Incompatible Kaitai Struct C++/STL API: version 0.9 or later is required"
#endif

/**
 * .dbf is a relational database format introduced in DOS database
 * management system dBASE in 1982.
 * 
 * One .dbf file corresponds to one table and contains a series of headers,
 * specification of fields, and a number of fixed-size records.
 * \sa http://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm Source
 */

class dbf_t : public kaitai::kstruct {

public:
    class header2_t;
    class field_t;
    class header1_t;
    class header_dbase_3_t;
    class header_dbase_7_t;
    class record_t;

    enum delete_state_t {
        DELETE_STATE_FALSE = 32,
        DELETE_STATE_TRUE = 42
    };

    dbf_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent = 0, dbf_t* p__root = 0);

private:
    void _read();
    void _clean_up();

public:
    ~dbf_t();

    class header2_t : public kaitai::kstruct {

    public:

        header2_t(kaitai::kstream* p__io, dbf_t* p__parent = 0, dbf_t* p__root = 0);

    private:
        void _read();
        void _clean_up();

    public:
        ~header2_t();

    private:
        header_dbase_3_t* m_header_dbase_3;
        bool n_header_dbase_3;

    public:
        bool _is_null_header_dbase_3() { header_dbase_3(); return n_header_dbase_3; };

    private:
        header_dbase_7_t* m_header_dbase_7;
        bool n_header_dbase_7;

    public:
        bool _is_null_header_dbase_7() { header_dbase_7(); return n_header_dbase_7; };

    private:
        std::vector<field_t*>* m_fields;
        dbf_t* m__root;
        dbf_t* m__parent;

    public:
        header_dbase_3_t* header_dbase_3() const { return m_header_dbase_3; }
        header_dbase_7_t* header_dbase_7() const { return m_header_dbase_7; }
        std::vector<field_t*>* fields() const { return m_fields; }
        dbf_t* _root() const { return m__root; }
        dbf_t* _parent() const { return m__parent; }
    };

    class field_t : public kaitai::kstruct {

    public:

        field_t(kaitai::kstream* p__io, dbf_t::header2_t* p__parent = 0, dbf_t* p__root = 0);

    private:
        void _read();
        void _clean_up();

    public:
        ~field_t();

    private:
        std::string m_name;
        uint8_t m_datatype;
        uint32_t m_data_address;
        uint8_t m_length;
        uint8_t m_decimal_count;
        std::string m_reserved1;
        uint8_t m_work_area_id;
        std::string m_reserved2;
        uint8_t m_set_fields_flag;
        std::string m_reserved3;
        dbf_t* m__root;
        dbf_t::header2_t* m__parent;

    public:
        std::string name() const { return m_name; }
        uint8_t datatype() const { return m_datatype; }
        uint32_t data_address() const { return m_data_address; }
        uint8_t length() const { return m_length; }
        uint8_t decimal_count() const { return m_decimal_count; }
        std::string reserved1() const { return m_reserved1; }
        uint8_t work_area_id() const { return m_work_area_id; }
        std::string reserved2() const { return m_reserved2; }
        uint8_t set_fields_flag() const { return m_set_fields_flag; }
        std::string reserved3() const { return m_reserved3; }
        dbf_t* _root() const { return m__root; }
        dbf_t::header2_t* _parent() const { return m__parent; }
    };

    /**
     * \sa http://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm - section 1.1
     */

    class header1_t : public kaitai::kstruct {

    public:

        header1_t(kaitai::kstream* p__io, dbf_t* p__parent = 0, dbf_t* p__root = 0);

    private:
        void _read();
        void _clean_up();

    public:
        ~header1_t();

    private:
        bool f_dbase_level;
        int32_t m_dbase_level;

    public:
        int32_t dbase_level();

    private:
        uint8_t m_version;
        uint8_t m_last_update_y;
        uint8_t m_last_update_m;
        uint8_t m_last_update_d;
        uint32_t m_num_records;
        uint16_t m_len_header;
        uint16_t m_len_record;
        dbf_t* m__root;
        dbf_t* m__parent;

    public:
        uint8_t version() const { return m_version; }
        uint8_t last_update_y() const { return m_last_update_y; }
        uint8_t last_update_m() const { return m_last_update_m; }
        uint8_t last_update_d() const { return m_last_update_d; }
        uint32_t num_records() const { return m_num_records; }
        uint16_t len_header() const { return m_len_header; }
        uint16_t len_record() const { return m_len_record; }
        dbf_t* _root() const { return m__root; }
        dbf_t* _parent() const { return m__parent; }
    };

    class header_dbase_3_t : public kaitai::kstruct {

    public:

        header_dbase_3_t(kaitai::kstream* p__io, dbf_t::header2_t* p__parent = 0, dbf_t* p__root = 0);

    private:
        void _read();
        void _clean_up();

    public:
        ~header_dbase_3_t();

    private:
        std::string m_reserved1;
        std::string m_reserved2;
        std::string m_reserved3;
        dbf_t* m__root;
        dbf_t::header2_t* m__parent;

    public:
        std::string reserved1() const { return m_reserved1; }
        std::string reserved2() const { return m_reserved2; }
        std::string reserved3() const { return m_reserved3; }
        dbf_t* _root() const { return m__root; }
        dbf_t::header2_t* _parent() const { return m__parent; }
    };

    class header_dbase_7_t : public kaitai::kstruct {

    public:

        header_dbase_7_t(kaitai::kstream* p__io, dbf_t::header2_t* p__parent = 0, dbf_t* p__root = 0);

    private:
        void _read();
        void _clean_up();

    public:
        ~header_dbase_7_t();

    private:
        std::string m_reserved1;
        uint8_t m_has_incomplete_transaction;
        uint8_t m_dbase_iv_encryption;
        std::string m_reserved2;
        uint8_t m_production_mdx;
        uint8_t m_language_driver_id;
        std::string m_reserved3;
        std::string m_language_driver_name;
        std::string m_reserved4;
        dbf_t* m__root;
        dbf_t::header2_t* m__parent;

    public:
        std::string reserved1() const { return m_reserved1; }
        uint8_t has_incomplete_transaction() const { return m_has_incomplete_transaction; }
        uint8_t dbase_iv_encryption() const { return m_dbase_iv_encryption; }
        std::string reserved2() const { return m_reserved2; }
        uint8_t production_mdx() const { return m_production_mdx; }
        uint8_t language_driver_id() const { return m_language_driver_id; }
        std::string reserved3() const { return m_reserved3; }
        std::string language_driver_name() const { return m_language_driver_name; }
        std::string reserved4() const { return m_reserved4; }
        dbf_t* _root() const { return m__root; }
        dbf_t::header2_t* _parent() const { return m__parent; }
    };

    class record_t : public kaitai::kstruct {

    public:

        record_t(kaitai::kstream* p__io, dbf_t* p__parent = 0, dbf_t* p__root = 0);

    private:
        void _read();
        void _clean_up();

    public:
        ~record_t();

    private:
        delete_state_t m_deleted;
        std::vector<std::string>* m_record_fields;
        dbf_t* m__root;
        dbf_t* m__parent;

    public:
        delete_state_t deleted() const { return m_deleted; }
        std::vector<std::string>* record_fields() const { return m_record_fields; }
        dbf_t* _root() const { return m__root; }
        dbf_t* _parent() const { return m__parent; }
    };

private:
    header1_t* m_header1;
    header2_t* m_header2;
    std::string m_header_terminator;
    std::vector<record_t*>* m_records;
    dbf_t* m__root;
    kaitai::kstruct* m__parent;
    std::string m__raw_header2;
    kaitai::kstream* m__io__raw_header2;
    std::vector<std::string>* m__raw_records;
    std::vector<kaitai::kstream*>* m__io__raw_records;

public:
    header1_t* header1() const { return m_header1; }
    header2_t* header2() const { return m_header2; }
    std::string header_terminator() const { return m_header_terminator; }
    std::vector<record_t*>* records() const { return m_records; }
    dbf_t* _root() const { return m__root; }
    kaitai::kstruct* _parent() const { return m__parent; }
    std::string _raw_header2() const { return m__raw_header2; }
    kaitai::kstream* _io__raw_header2() const { return m__io__raw_header2; }
    std::vector<std::string>* _raw_records() const { return m__raw_records; }
    std::vector<kaitai::kstream*>* _io__raw_records() const { return m__io__raw_records; }
};

#endif  // DBF_H_

dbf.cpp

// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild

#include "dbf.h"
#include "kaitai/exceptions.h"

dbf_t::dbf_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent, dbf_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = this;
    m_header1 = 0;
    m_header2 = 0;
    m__io__raw_header2 = 0;
    m_records = 0;
    m__raw_records = 0;
    m__io__raw_records = 0;

    try {
        _read();
    } catch(...) {
        _clean_up();
        throw;
    }
}

void dbf_t::_read() {
    m_header1 = new header1_t(m__io, this, m__root);
    m__raw_header2 = m__io->read_bytes(((header1()->len_header() - 12) - 1));
    m__io__raw_header2 = new kaitai::kstream(m__raw_header2);
    m_header2 = new header2_t(m__io__raw_header2, this, m__root);
    m_header_terminator = m__io->read_bytes(1);
    if (!(header_terminator() == std::string("\x0D", 1))) {
        throw kaitai::validation_not_equal_error<std::string>(std::string("\x0D", 1), header_terminator(), _io(), std::string("/seq/2"));
    }
    m__raw_records = new std::vector<std::string>();
    m__io__raw_records = new std::vector<kaitai::kstream*>();
    m_records = new std::vector<record_t*>();
    const int l_records = header1()->num_records();
    for (int i = 0; i < l_records; i++) {
        m__raw_records->push_back(m__io->read_bytes(header1()->len_record()));
        kaitai::kstream* io__raw_records = new kaitai::kstream(m__raw_records->at(m__raw_records->size() - 1));
        m__io__raw_records->push_back(io__raw_records);
        m_records->push_back(new record_t(io__raw_records, this, m__root));
    }
}

dbf_t::~dbf_t() {
    _clean_up();
}

void dbf_t::_clean_up() {
    if (m_header1) {
        delete m_header1; m_header1 = 0;
    }
    if (m__io__raw_header2) {
        delete m__io__raw_header2; m__io__raw_header2 = 0;
    }
    if (m_header2) {
        delete m_header2; m_header2 = 0;
    }
    if (m__raw_records) {
        delete m__raw_records; m__raw_records = 0;
    }
    if (m__io__raw_records) {
        for (std::vector<kaitai::kstream*>::iterator it = m__io__raw_records->begin(); it != m__io__raw_records->end(); ++it) {
            delete *it;
        }
        delete m__io__raw_records; m__io__raw_records = 0;
    }
    if (m_records) {
        for (std::vector<record_t*>::iterator it = m_records->begin(); it != m_records->end(); ++it) {
            delete *it;
        }
        delete m_records; m_records = 0;
    }
}

dbf_t::header2_t::header2_t(kaitai::kstream* p__io, dbf_t* p__parent, dbf_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    m_header_dbase_3 = 0;
    m_header_dbase_7 = 0;
    m_fields = 0;

    try {
        _read();
    } catch(...) {
        _clean_up();
        throw;
    }
}

void dbf_t::header2_t::_read() {
    n_header_dbase_3 = true;
    if (_root()->header1()->dbase_level() == 3) {
        n_header_dbase_3 = false;
        m_header_dbase_3 = new header_dbase_3_t(m__io, this, m__root);
    }
    n_header_dbase_7 = true;
    if (_root()->header1()->dbase_level() == 7) {
        n_header_dbase_7 = false;
        m_header_dbase_7 = new header_dbase_7_t(m__io, this, m__root);
    }
    m_fields = new std::vector<field_t*>();
    {
        int i = 0;
        while (!m__io->is_eof()) {
            m_fields->push_back(new field_t(m__io, this, m__root));
            i++;
        }
    }
}

dbf_t::header2_t::~header2_t() {
    _clean_up();
}

void dbf_t::header2_t::_clean_up() {
    if (!n_header_dbase_3) {
        if (m_header_dbase_3) {
            delete m_header_dbase_3; m_header_dbase_3 = 0;
        }
    }
    if (!n_header_dbase_7) {
        if (m_header_dbase_7) {
            delete m_header_dbase_7; m_header_dbase_7 = 0;
        }
    }
    if (m_fields) {
        for (std::vector<field_t*>::iterator it = m_fields->begin(); it != m_fields->end(); ++it) {
            delete *it;
        }
        delete m_fields; m_fields = 0;
    }
}

dbf_t::field_t::field_t(kaitai::kstream* p__io, dbf_t::header2_t* p__parent, dbf_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;

    try {
        _read();
    } catch(...) {
        _clean_up();
        throw;
    }
}

void dbf_t::field_t::_read() {
    m_name = kaitai::kstream::bytes_to_str(kaitai::kstream::bytes_terminate(m__io->read_bytes(11), 0, false), std::string("ASCII"));
    m_datatype = m__io->read_u1();
    m_data_address = m__io->read_u4le();
    m_length = m__io->read_u1();
    m_decimal_count = m__io->read_u1();
    m_reserved1 = m__io->read_bytes(2);
    m_work_area_id = m__io->read_u1();
    m_reserved2 = m__io->read_bytes(2);
    m_set_fields_flag = m__io->read_u1();
    m_reserved3 = m__io->read_bytes(8);
}

dbf_t::field_t::~field_t() {
    _clean_up();
}

void dbf_t::field_t::_clean_up() {
}

dbf_t::header1_t::header1_t(kaitai::kstream* p__io, dbf_t* p__parent, dbf_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    f_dbase_level = false;

    try {
        _read();
    } catch(...) {
        _clean_up();
        throw;
    }
}

void dbf_t::header1_t::_read() {
    m_version = m__io->read_u1();
    m_last_update_y = m__io->read_u1();
    m_last_update_m = m__io->read_u1();
    m_last_update_d = m__io->read_u1();
    m_num_records = m__io->read_u4le();
    m_len_header = m__io->read_u2le();
    m_len_record = m__io->read_u2le();
}

dbf_t::header1_t::~header1_t() {
    _clean_up();
}

void dbf_t::header1_t::_clean_up() {
}

int32_t dbf_t::header1_t::dbase_level() {
    if (f_dbase_level)
        return m_dbase_level;
    m_dbase_level = (version() & 7);
    f_dbase_level = true;
    return m_dbase_level;
}

dbf_t::header_dbase_3_t::header_dbase_3_t(kaitai::kstream* p__io, dbf_t::header2_t* p__parent, dbf_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;

    try {
        _read();
    } catch(...) {
        _clean_up();
        throw;
    }
}

void dbf_t::header_dbase_3_t::_read() {
    m_reserved1 = m__io->read_bytes(3);
    m_reserved2 = m__io->read_bytes(13);
    m_reserved3 = m__io->read_bytes(4);
}

dbf_t::header_dbase_3_t::~header_dbase_3_t() {
    _clean_up();
}

void dbf_t::header_dbase_3_t::_clean_up() {
}

dbf_t::header_dbase_7_t::header_dbase_7_t(kaitai::kstream* p__io, dbf_t::header2_t* p__parent, dbf_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;

    try {
        _read();
    } catch(...) {
        _clean_up();
        throw;
    }
}

void dbf_t::header_dbase_7_t::_read() {
    m_reserved1 = m__io->read_bytes(2);
    if (!(reserved1() == std::string("\x00\x00", 2))) {
        throw kaitai::validation_not_equal_error<std::string>(std::string("\x00\x00", 2), reserved1(), _io(), std::string("/types/header_dbase_7/seq/0"));
    }
    m_has_incomplete_transaction = m__io->read_u1();
    m_dbase_iv_encryption = m__io->read_u1();
    m_reserved2 = m__io->read_bytes(12);
    m_production_mdx = m__io->read_u1();
    m_language_driver_id = m__io->read_u1();
    m_reserved3 = m__io->read_bytes(2);
    if (!(reserved3() == std::string("\x00\x00", 2))) {
        throw kaitai::validation_not_equal_error<std::string>(std::string("\x00\x00", 2), reserved3(), _io(), std::string("/types/header_dbase_7/seq/6"));
    }
    m_language_driver_name = m__io->read_bytes(32);
    m_reserved4 = m__io->read_bytes(4);
}

dbf_t::header_dbase_7_t::~header_dbase_7_t() {
    _clean_up();
}

void dbf_t::header_dbase_7_t::_clean_up() {
}

dbf_t::record_t::record_t(kaitai::kstream* p__io, dbf_t* p__parent, dbf_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    m_record_fields = 0;

    try {
        _read();
    } catch(...) {
        _clean_up();
        throw;
    }
}

void dbf_t::record_t::_read() {
    m_deleted = static_cast<dbf_t::delete_state_t>(m__io->read_u1());
    m_record_fields = new std::vector<std::string>();
    const int l_record_fields = _root()->header2()->fields()->size();
    for (int i = 0; i < l_record_fields; i++) {
        m_record_fields->push_back(m__io->read_bytes(_root()->header2()->fields()->at(i)->length()));
    }
}

dbf_t::record_t::~record_t() {
    _clean_up();
}

void dbf_t::record_t::_clean_up() {
    if (m_record_fields) {
        delete m_record_fields; m_record_fields = 0;
    }
}