.dbf file format of dBASE: Ruby parsing library

.dbf is a relational database format introduced in DOS database management system dBASE in 1982.

One .dbf file corresponds to one table and contains a series of headers, specification of fields, and a number of fixed-size records.

This page hosts a formal specification of .dbf file format of dBASE using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.

Usage

Runtime library

All parsing code for Ruby generated by Kaitai Struct depends on the Ruby runtime library. You have to install it before you can parse data.

The Ruby runtime library can be installed from RubyGems:

gem install kaitai-struct

Code

Parse a local file and get structure in memory:

data = Dbf.from_file("path/to/local/file.dbf")

Or parse structure from a string of bytes:

bytes = "\x00\x01\x02..."
data = Dbf.new(Kaitai::Struct::Stream.new(bytes))

After that, one can get various attributes from the structure by invoking getter methods like:

data.header1 # => get header1

Ruby source code to parse .dbf file format of dBASE

dbf.rb

# This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild

require 'kaitai/struct/struct'

unless Gem::Version.new(Kaitai::Struct::VERSION) >= Gem::Version.new('0.9')
  raise "Incompatible Kaitai Struct Ruby API: 0.9 or later is required, but you have #{Kaitai::Struct::VERSION}"
end


##
# .dbf is a relational database format introduced in DOS database
# management system dBASE in 1982.
# 
# One .dbf file corresponds to one table and contains a series of headers,
# specification of fields, and a number of fixed-size records.
# @see http://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm Source
class Dbf < Kaitai::Struct::Struct

  DELETE_STATE = {
    32 => :delete_state_false,
    42 => :delete_state_true,
  }
  I__DELETE_STATE = DELETE_STATE.invert
  def initialize(_io, _parent = nil, _root = self)
    super(_io, _parent, _root)
    _read
  end

  def _read
    @header1 = Header1.new(@_io, self, @_root)
    @_raw_header2 = @_io.read_bytes(((header1.len_header - 12) - 1))
    _io__raw_header2 = Kaitai::Struct::Stream.new(@_raw_header2)
    @header2 = Header2.new(_io__raw_header2, self, @_root)
    @header_terminator = @_io.read_bytes(1)
    raise Kaitai::Struct::ValidationNotEqualError.new([13].pack('C*'), header_terminator, _io, "/seq/2") if not header_terminator == [13].pack('C*')
    @_raw_records = []
    @records = []
    (header1.num_records).times { |i|
      @_raw_records << @_io.read_bytes(header1.len_record)
      _io__raw_records = Kaitai::Struct::Stream.new(@_raw_records[i])
      @records << Record.new(_io__raw_records, self, @_root)
    }
    self
  end
  class Header2 < Kaitai::Struct::Struct
    def initialize(_io, _parent = nil, _root = self)
      super(_io, _parent, _root)
      _read
    end

    def _read
      if _root.header1.dbase_level == 3
        @header_dbase_3 = HeaderDbase3.new(@_io, self, @_root)
      end
      if _root.header1.dbase_level == 7
        @header_dbase_7 = HeaderDbase7.new(@_io, self, @_root)
      end
      @fields = []
      i = 0
      while not @_io.eof?
        @fields << Field.new(@_io, self, @_root)
        i += 1
      end
      self
    end
    attr_reader :header_dbase_3
    attr_reader :header_dbase_7
    attr_reader :fields
  end
  class Field < Kaitai::Struct::Struct
    def initialize(_io, _parent = nil, _root = self)
      super(_io, _parent, _root)
      _read
    end

    def _read
      @name = (Kaitai::Struct::Stream::bytes_terminate(@_io.read_bytes(11), 0, false)).force_encoding("ASCII")
      @datatype = @_io.read_u1
      @data_address = @_io.read_u4le
      @length = @_io.read_u1
      @decimal_count = @_io.read_u1
      @reserved1 = @_io.read_bytes(2)
      @work_area_id = @_io.read_u1
      @reserved2 = @_io.read_bytes(2)
      @set_fields_flag = @_io.read_u1
      @reserved3 = @_io.read_bytes(8)
      self
    end
    attr_reader :name
    attr_reader :datatype
    attr_reader :data_address
    attr_reader :length
    attr_reader :decimal_count
    attr_reader :reserved1
    attr_reader :work_area_id
    attr_reader :reserved2
    attr_reader :set_fields_flag
    attr_reader :reserved3
  end

  ##
  # @see http://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm - section 1.1
  class Header1 < Kaitai::Struct::Struct
    def initialize(_io, _parent = nil, _root = self)
      super(_io, _parent, _root)
      _read
    end

    def _read
      @version = @_io.read_u1
      @last_update_y = @_io.read_u1
      @last_update_m = @_io.read_u1
      @last_update_d = @_io.read_u1
      @num_records = @_io.read_u4le
      @len_header = @_io.read_u2le
      @len_record = @_io.read_u2le
      self
    end
    def dbase_level
      return @dbase_level unless @dbase_level.nil?
      @dbase_level = (version & 7)
      @dbase_level
    end
    attr_reader :version
    attr_reader :last_update_y
    attr_reader :last_update_m
    attr_reader :last_update_d
    attr_reader :num_records
    attr_reader :len_header
    attr_reader :len_record
  end
  class HeaderDbase3 < Kaitai::Struct::Struct
    def initialize(_io, _parent = nil, _root = self)
      super(_io, _parent, _root)
      _read
    end

    def _read
      @reserved1 = @_io.read_bytes(3)
      @reserved2 = @_io.read_bytes(13)
      @reserved3 = @_io.read_bytes(4)
      self
    end
    attr_reader :reserved1
    attr_reader :reserved2
    attr_reader :reserved3
  end
  class HeaderDbase7 < Kaitai::Struct::Struct
    def initialize(_io, _parent = nil, _root = self)
      super(_io, _parent, _root)
      _read
    end

    def _read
      @reserved1 = @_io.read_bytes(2)
      raise Kaitai::Struct::ValidationNotEqualError.new([0, 0].pack('C*'), reserved1, _io, "/types/header_dbase_7/seq/0") if not reserved1 == [0, 0].pack('C*')
      @has_incomplete_transaction = @_io.read_u1
      @dbase_iv_encryption = @_io.read_u1
      @reserved2 = @_io.read_bytes(12)
      @production_mdx = @_io.read_u1
      @language_driver_id = @_io.read_u1
      @reserved3 = @_io.read_bytes(2)
      raise Kaitai::Struct::ValidationNotEqualError.new([0, 0].pack('C*'), reserved3, _io, "/types/header_dbase_7/seq/6") if not reserved3 == [0, 0].pack('C*')
      @language_driver_name = @_io.read_bytes(32)
      @reserved4 = @_io.read_bytes(4)
      self
    end
    attr_reader :reserved1
    attr_reader :has_incomplete_transaction
    attr_reader :dbase_iv_encryption
    attr_reader :reserved2
    attr_reader :production_mdx
    attr_reader :language_driver_id
    attr_reader :reserved3
    attr_reader :language_driver_name
    attr_reader :reserved4
  end
  class Record < Kaitai::Struct::Struct
    def initialize(_io, _parent = nil, _root = self)
      super(_io, _parent, _root)
      _read
    end

    def _read
      @deleted = Kaitai::Struct::Stream::resolve_enum(Dbf::DELETE_STATE, @_io.read_u1)
      @record_fields = []
      (_root.header2.fields.length).times { |i|
        @record_fields << @_io.read_bytes(_root.header2.fields[i].length)
      }
      self
    end
    attr_reader :deleted
    attr_reader :record_fields
  end
  attr_reader :header1
  attr_reader :header2
  attr_reader :header_terminator
  attr_reader :records
  attr_reader :_raw_header2
  attr_reader :_raw_records
end