.dbf file format of dBASE: C++/STL parsing library

Application

dBASE

File extension

dbf

KS implementation details

License: CC0-1.0

This page hosts a formal specification of .dbf file format of dBASE using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.

Usage

Using Kaitai Struct in C++/STL usually consists of 3 steps.

  1. We need to create an STL input stream (std::istream).
    • One can open a stream for reading from a local file:
      #include <fstream>
      
      std::ifstream is("path/to/local/file.dbf", std::ifstream::binary);
    • Or one can prepare a stream for reading from existing std::string str:
      #include <sstream>
      
      std::istringstream is(str);
    • Or one can parse arbitrary char* buffer in memory, given that we know its size:
      #include <sstream>
      
      const char buf[] = { ... };
      std::string str(buf, sizeof buf);
      std::istringstream is(str);
  2. We need to wrap our input stream into Kaitai stream:
    #include <kaitai/kaitaistream.h>
    
    kaitai::kstream ks(&is);
  3. And finally, we can invoke the parsing:
    dbf_t data(&ks);

After that, one can get various attributes from the structure by invoking getter methods like:

data.header1() // => get header1

C++/STL source code to parse .dbf file format of dBASE

dbf.h

#ifndef DBF_H_
#define DBF_H_

// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild

#include "kaitai/kaitaistruct.h"

#include <stdint.h>
#include <vector>

#if KAITAI_STRUCT_VERSION < 7000L
#error "Incompatible Kaitai Struct C++/STL API: version 0.7 or later is required"
#endif

class dbf_t : public kaitai::kstruct {

public:
    class header2_t;
    class field_t;
    class header1_t;
    class header_dbase_3_t;
    class header_dbase_7_t;

    dbf_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent = 0, dbf_t* p__root = 0);

private:
    void _read();

public:
    ~dbf_t();

    class header2_t : public kaitai::kstruct {

    public:

        header2_t(kaitai::kstream* p__io, dbf_t* p__parent = 0, dbf_t* p__root = 0);

    private:
        void _read();

    public:
        ~header2_t();

    private:
        header_dbase_3_t* m_header_dbase_3;
        bool n_header_dbase_3;

    public:
        bool _is_null_header_dbase_3() { header_dbase_3(); return n_header_dbase_3; };

    private:
        header_dbase_7_t* m_header_dbase_7;
        bool n_header_dbase_7;

    public:
        bool _is_null_header_dbase_7() { header_dbase_7(); return n_header_dbase_7; };

    private:
        std::vector<field_t*>* m_fields;
        dbf_t* m__root;
        dbf_t* m__parent;

    public:
        header_dbase_3_t* header_dbase_3() const { return m_header_dbase_3; }
        header_dbase_7_t* header_dbase_7() const { return m_header_dbase_7; }
        std::vector<field_t*>* fields() const { return m_fields; }
        dbf_t* _root() const { return m__root; }
        dbf_t* _parent() const { return m__parent; }
    };

    class field_t : public kaitai::kstruct {

    public:

        field_t(kaitai::kstream* p__io, dbf_t::header2_t* p__parent = 0, dbf_t* p__root = 0);

    private:
        void _read();

    public:
        ~field_t();

    private:
        std::string m_name;
        uint8_t m_datatype;
        uint32_t m_data_address;
        uint8_t m_length;
        uint8_t m_decimal_count;
        std::string m_reserved1;
        uint8_t m_work_area_id;
        std::string m_reserved2;
        uint8_t m_set_fields_flag;
        std::string m_reserved3;
        dbf_t* m__root;
        dbf_t::header2_t* m__parent;

    public:
        std::string name() const { return m_name; }
        uint8_t datatype() const { return m_datatype; }
        uint32_t data_address() const { return m_data_address; }
        uint8_t length() const { return m_length; }
        uint8_t decimal_count() const { return m_decimal_count; }
        std::string reserved1() const { return m_reserved1; }
        uint8_t work_area_id() const { return m_work_area_id; }
        std::string reserved2() const { return m_reserved2; }
        uint8_t set_fields_flag() const { return m_set_fields_flag; }
        std::string reserved3() const { return m_reserved3; }
        dbf_t* _root() const { return m__root; }
        dbf_t::header2_t* _parent() const { return m__parent; }
    };

    /**
     * \sa - section 1.1
     */

    class header1_t : public kaitai::kstruct {

    public:

        header1_t(kaitai::kstream* p__io, dbf_t* p__parent = 0, dbf_t* p__root = 0);

    private:
        void _read();

    public:
        ~header1_t();

    private:
        bool f_dbase_level;
        int32_t m_dbase_level;

    public:
        int32_t dbase_level();

    private:
        uint8_t m_version;
        uint8_t m_last_update_y;
        uint8_t m_last_update_m;
        uint8_t m_last_update_d;
        uint32_t m_num_records;
        uint16_t m_len_header;
        uint16_t m_len_record;
        dbf_t* m__root;
        dbf_t* m__parent;

    public:
        uint8_t version() const { return m_version; }
        uint8_t last_update_y() const { return m_last_update_y; }
        uint8_t last_update_m() const { return m_last_update_m; }
        uint8_t last_update_d() const { return m_last_update_d; }
        uint32_t num_records() const { return m_num_records; }
        uint16_t len_header() const { return m_len_header; }
        uint16_t len_record() const { return m_len_record; }
        dbf_t* _root() const { return m__root; }
        dbf_t* _parent() const { return m__parent; }
    };

    class header_dbase_3_t : public kaitai::kstruct {

    public:

        header_dbase_3_t(kaitai::kstream* p__io, dbf_t::header2_t* p__parent = 0, dbf_t* p__root = 0);

    private:
        void _read();

    public:
        ~header_dbase_3_t();

    private:
        std::string m_reserved1;
        std::string m_reserved2;
        std::string m_reserved3;
        dbf_t* m__root;
        dbf_t::header2_t* m__parent;

    public:
        std::string reserved1() const { return m_reserved1; }
        std::string reserved2() const { return m_reserved2; }
        std::string reserved3() const { return m_reserved3; }
        dbf_t* _root() const { return m__root; }
        dbf_t::header2_t* _parent() const { return m__parent; }
    };

    class header_dbase_7_t : public kaitai::kstruct {

    public:

        header_dbase_7_t(kaitai::kstream* p__io, dbf_t::header2_t* p__parent = 0, dbf_t* p__root = 0);

    private:
        void _read();

    public:
        ~header_dbase_7_t();

    private:
        std::string m_reserved1;
        uint8_t m_has_incomplete_transaction;
        uint8_t m_dbase_iv_encryption;
        std::string m_reserved2;
        uint8_t m_production_mdx;
        uint8_t m_language_driver_id;
        std::string m_reserved3;
        std::string m_language_driver_name;
        std::string m_reserved4;
        dbf_t* m__root;
        dbf_t::header2_t* m__parent;

    public:
        std::string reserved1() const { return m_reserved1; }
        uint8_t has_incomplete_transaction() const { return m_has_incomplete_transaction; }
        uint8_t dbase_iv_encryption() const { return m_dbase_iv_encryption; }
        std::string reserved2() const { return m_reserved2; }
        uint8_t production_mdx() const { return m_production_mdx; }
        uint8_t language_driver_id() const { return m_language_driver_id; }
        std::string reserved3() const { return m_reserved3; }
        std::string language_driver_name() const { return m_language_driver_name; }
        std::string reserved4() const { return m_reserved4; }
        dbf_t* _root() const { return m__root; }
        dbf_t::header2_t* _parent() const { return m__parent; }
    };

private:
    header1_t* m_header1;
    header2_t* m_header2;
    std::vector<std::string>* m_records;
    dbf_t* m__root;
    kaitai::kstruct* m__parent;
    std::string m__raw_header2;
    kaitai::kstream* m__io__raw_header2;

public:
    header1_t* header1() const { return m_header1; }
    header2_t* header2() const { return m_header2; }
    std::vector<std::string>* records() const { return m_records; }
    dbf_t* _root() const { return m__root; }
    kaitai::kstruct* _parent() const { return m__parent; }
    std::string _raw_header2() const { return m__raw_header2; }
    kaitai::kstream* _io__raw_header2() const { return m__io__raw_header2; }
};

#endif  // DBF_H_

dbf.cpp

// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild

#include "dbf.h"



dbf_t::dbf_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent, dbf_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = this;
    _read();
}

void dbf_t::_read() {
    m_header1 = new header1_t(m__io, this, m__root);
    m__raw_header2 = m__io->read_bytes((header1()->len_header() - 12));
    m__io__raw_header2 = new kaitai::kstream(m__raw_header2);
    m_header2 = new header2_t(m__io__raw_header2, this, m__root);
    int l_records = header1()->num_records();
    m_records = new std::vector<std::string>();
    m_records->reserve(l_records);
    for (int i = 0; i < l_records; i++) {
        m_records->push_back(m__io->read_bytes(header1()->len_record()));
    }
}

dbf_t::~dbf_t() {
    delete m_header1;
    delete m__io__raw_header2;
    delete m_header2;
    delete m_records;
}

dbf_t::header2_t::header2_t(kaitai::kstream* p__io, dbf_t* p__parent, dbf_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    _read();
}

void dbf_t::header2_t::_read() {
    n_header_dbase_3 = true;
    if (_root()->header1()->dbase_level() == 3) {
        n_header_dbase_3 = false;
        m_header_dbase_3 = new header_dbase_3_t(m__io, this, m__root);
    }
    n_header_dbase_7 = true;
    if (_root()->header1()->dbase_level() == 7) {
        n_header_dbase_7 = false;
        m_header_dbase_7 = new header_dbase_7_t(m__io, this, m__root);
    }
    int l_fields = 11;
    m_fields = new std::vector<field_t*>();
    m_fields->reserve(l_fields);
    for (int i = 0; i < l_fields; i++) {
        m_fields->push_back(new field_t(m__io, this, m__root));
    }
}

dbf_t::header2_t::~header2_t() {
    if (!n_header_dbase_3) {
        delete m_header_dbase_3;
    }
    if (!n_header_dbase_7) {
        delete m_header_dbase_7;
    }
    for (std::vector<field_t*>::iterator it = m_fields->begin(); it != m_fields->end(); ++it) {
        delete *it;
    }
    delete m_fields;
}

dbf_t::field_t::field_t(kaitai::kstream* p__io, dbf_t::header2_t* p__parent, dbf_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    _read();
}

void dbf_t::field_t::_read() {
    m_name = kaitai::kstream::bytes_to_str(m__io->read_bytes(11), std::string("ASCII"));
    m_datatype = m__io->read_u1();
    m_data_address = m__io->read_u4le();
    m_length = m__io->read_u1();
    m_decimal_count = m__io->read_u1();
    m_reserved1 = m__io->read_bytes(2);
    m_work_area_id = m__io->read_u1();
    m_reserved2 = m__io->read_bytes(2);
    m_set_fields_flag = m__io->read_u1();
    m_reserved3 = m__io->read_bytes(8);
}

dbf_t::field_t::~field_t() {
}

dbf_t::header1_t::header1_t(kaitai::kstream* p__io, dbf_t* p__parent, dbf_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    f_dbase_level = false;
    _read();
}

void dbf_t::header1_t::_read() {
    m_version = m__io->read_u1();
    m_last_update_y = m__io->read_u1();
    m_last_update_m = m__io->read_u1();
    m_last_update_d = m__io->read_u1();
    m_num_records = m__io->read_u4le();
    m_len_header = m__io->read_u2le();
    m_len_record = m__io->read_u2le();
}

dbf_t::header1_t::~header1_t() {
}

int32_t dbf_t::header1_t::dbase_level() {
    if (f_dbase_level)
        return m_dbase_level;
    m_dbase_level = (version() & 7);
    f_dbase_level = true;
    return m_dbase_level;
}

dbf_t::header_dbase_3_t::header_dbase_3_t(kaitai::kstream* p__io, dbf_t::header2_t* p__parent, dbf_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    _read();
}

void dbf_t::header_dbase_3_t::_read() {
    m_reserved1 = m__io->read_bytes(3);
    m_reserved2 = m__io->read_bytes(13);
    m_reserved3 = m__io->read_bytes(4);
}

dbf_t::header_dbase_3_t::~header_dbase_3_t() {
}

dbf_t::header_dbase_7_t::header_dbase_7_t(kaitai::kstream* p__io, dbf_t::header2_t* p__parent, dbf_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    _read();
}

void dbf_t::header_dbase_7_t::_read() {
    m_reserved1 = m__io->ensure_fixed_contents(std::string("\x00\x00", 2));
    m_has_incomplete_transaction = m__io->read_u1();
    m_dbase_iv_encryption = m__io->read_u1();
    m_reserved2 = m__io->read_bytes(12);
    m_production_mdx = m__io->read_u1();
    m_language_driver_id = m__io->read_u1();
    m_reserved3 = m__io->ensure_fixed_contents(std::string("\x00\x00", 2));
    m_language_driver_name = m__io->read_bytes(32);
    m_reserved4 = m__io->read_bytes(4);
}

dbf_t::header_dbase_7_t::~header_dbase_7_t() {
}