.dbf is a relational database format introduced in DOS database management system dBASE in 1982.
One .dbf file corresponds to one table and contains a series of headers, specification of fields, and a number of fixed-size records.
This page hosts a formal specification of .dbf file format of dBASE using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.
All parsing code for Ruby generated by Kaitai Struct depends on the Ruby runtime library. You have to install it before you can parse data.
The Ruby runtime library can be installed from RubyGems:
gem install kaitai-struct
Parse a local file and get structure in memory:
data = Dbf.from_file("path/to/local/file.dbf")
Or parse structure from a string of bytes:
bytes = "\x00\x01\x02..."
data = Dbf.new(Kaitai::Struct::Stream.new(bytes))
After that, one can get various attributes from the structure by invoking getter methods like:
data.header1 # => get header1
# This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild
require 'kaitai/struct/struct'
unless Gem::Version.new(Kaitai::Struct::VERSION) >= Gem::Version.new('0.9')
raise "Incompatible Kaitai Struct Ruby API: 0.9 or later is required, but you have #{Kaitai::Struct::VERSION}"
end
##
# .dbf is a relational database format introduced in DOS database
# management system dBASE in 1982.
#
# One .dbf file corresponds to one table and contains a series of headers,
# specification of fields, and a number of fixed-size records.
# @see http://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm Source
class Dbf < Kaitai::Struct::Struct
DELETE_STATE = {
32 => :delete_state_false,
42 => :delete_state_true,
}
I__DELETE_STATE = DELETE_STATE.invert
def initialize(_io, _parent = nil, _root = self)
super(_io, _parent, _root)
_read
end
def _read
@header1 = Header1.new(@_io, self, @_root)
@_raw_header2 = @_io.read_bytes(((header1.len_header - 12) - 1))
_io__raw_header2 = Kaitai::Struct::Stream.new(@_raw_header2)
@header2 = Header2.new(_io__raw_header2, self, @_root)
@header_terminator = @_io.read_bytes(1)
raise Kaitai::Struct::ValidationNotEqualError.new([13].pack('C*'), header_terminator, _io, "/seq/2") if not header_terminator == [13].pack('C*')
@_raw_records = []
@records = []
(header1.num_records).times { |i|
@_raw_records << @_io.read_bytes(header1.len_record)
_io__raw_records = Kaitai::Struct::Stream.new(@_raw_records[i])
@records << Record.new(_io__raw_records, self, @_root)
}
self
end
class Header2 < Kaitai::Struct::Struct
def initialize(_io, _parent = nil, _root = self)
super(_io, _parent, _root)
_read
end
def _read
if _root.header1.dbase_level == 3
@header_dbase_3 = HeaderDbase3.new(@_io, self, @_root)
end
if _root.header1.dbase_level == 7
@header_dbase_7 = HeaderDbase7.new(@_io, self, @_root)
end
@fields = []
i = 0
while not @_io.eof?
@fields << Field.new(@_io, self, @_root)
i += 1
end
self
end
attr_reader :header_dbase_3
attr_reader :header_dbase_7
attr_reader :fields
end
class Field < Kaitai::Struct::Struct
def initialize(_io, _parent = nil, _root = self)
super(_io, _parent, _root)
_read
end
def _read
@name = (Kaitai::Struct::Stream::bytes_terminate(@_io.read_bytes(11), 0, false)).force_encoding("ASCII")
@datatype = @_io.read_u1
@data_address = @_io.read_u4le
@length = @_io.read_u1
@decimal_count = @_io.read_u1
@reserved1 = @_io.read_bytes(2)
@work_area_id = @_io.read_u1
@reserved2 = @_io.read_bytes(2)
@set_fields_flag = @_io.read_u1
@reserved3 = @_io.read_bytes(8)
self
end
attr_reader :name
attr_reader :datatype
attr_reader :data_address
attr_reader :length
attr_reader :decimal_count
attr_reader :reserved1
attr_reader :work_area_id
attr_reader :reserved2
attr_reader :set_fields_flag
attr_reader :reserved3
end
##
# @see http://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm - section 1.1
class Header1 < Kaitai::Struct::Struct
def initialize(_io, _parent = nil, _root = self)
super(_io, _parent, _root)
_read
end
def _read
@version = @_io.read_u1
@last_update_y = @_io.read_u1
@last_update_m = @_io.read_u1
@last_update_d = @_io.read_u1
@num_records = @_io.read_u4le
@len_header = @_io.read_u2le
@len_record = @_io.read_u2le
self
end
def dbase_level
return @dbase_level unless @dbase_level.nil?
@dbase_level = (version & 7)
@dbase_level
end
attr_reader :version
attr_reader :last_update_y
attr_reader :last_update_m
attr_reader :last_update_d
attr_reader :num_records
attr_reader :len_header
attr_reader :len_record
end
class HeaderDbase3 < Kaitai::Struct::Struct
def initialize(_io, _parent = nil, _root = self)
super(_io, _parent, _root)
_read
end
def _read
@reserved1 = @_io.read_bytes(3)
@reserved2 = @_io.read_bytes(13)
@reserved3 = @_io.read_bytes(4)
self
end
attr_reader :reserved1
attr_reader :reserved2
attr_reader :reserved3
end
class HeaderDbase7 < Kaitai::Struct::Struct
def initialize(_io, _parent = nil, _root = self)
super(_io, _parent, _root)
_read
end
def _read
@reserved1 = @_io.read_bytes(2)
raise Kaitai::Struct::ValidationNotEqualError.new([0, 0].pack('C*'), reserved1, _io, "/types/header_dbase_7/seq/0") if not reserved1 == [0, 0].pack('C*')
@has_incomplete_transaction = @_io.read_u1
@dbase_iv_encryption = @_io.read_u1
@reserved2 = @_io.read_bytes(12)
@production_mdx = @_io.read_u1
@language_driver_id = @_io.read_u1
@reserved3 = @_io.read_bytes(2)
raise Kaitai::Struct::ValidationNotEqualError.new([0, 0].pack('C*'), reserved3, _io, "/types/header_dbase_7/seq/6") if not reserved3 == [0, 0].pack('C*')
@language_driver_name = @_io.read_bytes(32)
@reserved4 = @_io.read_bytes(4)
self
end
attr_reader :reserved1
attr_reader :has_incomplete_transaction
attr_reader :dbase_iv_encryption
attr_reader :reserved2
attr_reader :production_mdx
attr_reader :language_driver_id
attr_reader :reserved3
attr_reader :language_driver_name
attr_reader :reserved4
end
class Record < Kaitai::Struct::Struct
def initialize(_io, _parent = nil, _root = self)
super(_io, _parent, _root)
_read
end
def _read
@deleted = Kaitai::Struct::Stream::resolve_enum(Dbf::DELETE_STATE, @_io.read_u1)
@record_fields = []
(_root.header2.fields.length).times { |i|
@record_fields << @_io.read_bytes(_root.header2.fields[i].length)
}
self
end
attr_reader :deleted
attr_reader :record_fields
end
attr_reader :header1
attr_reader :header2
attr_reader :header_terminator
attr_reader :records
attr_reader :_raw_header2
attr_reader :_raw_records
end