Portable Compiled Format (PCF) font is a bitmap font format originating from X11 Window System. It matches BDF format (which is text-based) closely, but instead being binary and platform-independent (as opposed to previously used SNF binary format) due to introduced features to handle different endianness and bit order.
The overall composition of the format is straightforward: it's more or less classic directory of type-offset-size pointers, pointing to what PCF format calls "tables". Each table carries a certain piece of information related to the font (metadata properties, metrics, bitmaps, mapping of glyphs to characters, etc).
This page hosts a formal specification of Portable Compiled Format (PCF) font using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.
All parsing code for Python generated by Kaitai Struct depends on the Python runtime library. You have to install it before you can parse data.
The Python runtime library can be installed from PyPI:
python3 -m pip install kaitaistruct
Parse a local file and get structure in memory:
data = PcfFont.from_file("path/to/local/file.pcf")
Or parse structure from a bytes:
from kaitaistruct import KaitaiStream, BytesIO
raw = b"\x00\x01\x02..."
data = PcfFont(KaitaiStream(BytesIO(raw)))
After that, one can get various attributes from the structure by invoking getter methods like:
data.magic # => get magic
# This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild
import kaitaistruct
from kaitaistruct import KaitaiStruct, KaitaiStream, BytesIO
from enum import Enum
if getattr(kaitaistruct, 'API_VERSION', (0, 9)) < (0, 9):
raise Exception("Incompatible Kaitai Struct Python API: 0.9 or later is required, but you have %s" % (kaitaistruct.__version__))
import bytes_with_io
class PcfFont(KaitaiStruct):
"""Portable Compiled Format (PCF) font is a bitmap font format
originating from X11 Window System. It matches BDF format (which is
text-based) closely, but instead being binary and
platform-independent (as opposed to previously used SNF binary
format) due to introduced features to handle different endianness
and bit order.
The overall composition of the format is straightforward: it's more
or less classic directory of type-offset-size pointers, pointing to
what PCF format calls "tables". Each table carries a certain
piece of information related to the font (metadata properties,
metrics, bitmaps, mapping of glyphs to characters, etc).
.. seealso::
Source - https://fontforge.org/docs/techref/pcf-format.html
"""
class Types(Enum):
properties = 1
accelerators = 2
metrics = 4
bitmaps = 8
ink_metrics = 16
bdf_encodings = 32
swidths = 64
glyph_names = 128
bdf_accelerators = 256
def __init__(self, _io, _parent=None, _root=None):
self._io = _io
self._parent = _parent
self._root = _root if _root else self
self._read()
def _read(self):
self.magic = self._io.read_bytes(4)
if not self.magic == b"\x01\x66\x63\x70":
raise kaitaistruct.ValidationNotEqualError(b"\x01\x66\x63\x70", self.magic, self._io, u"/seq/0")
self.num_tables = self._io.read_u4le()
self.tables = []
for i in range(self.num_tables):
self.tables.append(PcfFont.Table(self._io, self, self._root))
class Table(KaitaiStruct):
"""Table offers a offset + length pointer to a particular
table. "Type" of table references certain enum. Applications can
ignore enum values which they don't support.
"""
def __init__(self, _io, _parent=None, _root=None):
self._io = _io
self._parent = _parent
self._root = _root if _root else self
self._read()
def _read(self):
self.type = KaitaiStream.resolve_enum(PcfFont.Types, self._io.read_u4le())
self.format = PcfFont.Format(self._io, self, self._root)
self.len_body = self._io.read_u4le()
self.ofs_body = self._io.read_u4le()
class Swidths(KaitaiStruct):
"""Table containing scalable widths of characters.
.. seealso::
Source - https://fontforge.org/docs/techref/pcf-format.html#the-scalable-widths-table
"""
def __init__(self, _io, _parent=None, _root=None):
self._io = _io
self._parent = _parent
self._root = _root if _root else self
self._read()
def _read(self):
self.format = PcfFont.Format(self._io, self, self._root)
self.num_glyphs = self._io.read_u4le()
self.swidths = []
for i in range(self.num_glyphs):
self.swidths.append(self._io.read_u4le())
class Properties(KaitaiStruct):
"""Array of properties (key-value pairs), used to convey different X11
settings of a font. Key is always an X font atom.
.. seealso::
Source - https://fontforge.org/docs/techref/pcf-format.html#properties-table
"""
def __init__(self, _io, _parent=None, _root=None):
self._io = _io
self._parent = _parent
self._root = _root if _root else self
self._read()
def _read(self):
self.format = PcfFont.Format(self._io, self, self._root)
self.num_props = self._io.read_u4le()
self.props = []
for i in range(self.num_props):
self.props.append(PcfFont.Table.Properties.Prop(self._io, self, self._root))
self.padding = self._io.read_bytes((0 if (self.num_props & 3) == 0 else (4 - (self.num_props & 3))))
self.len_strings = self._io.read_u4le()
self._raw_strings = self._io.read_bytes(self.len_strings)
_io__raw_strings = KaitaiStream(BytesIO(self._raw_strings))
self.strings = bytes_with_io.BytesWithIo(_io__raw_strings)
class Prop(KaitaiStruct):
"""Property is a key-value pair, "key" being always a
string and "value" being either a string or a 32-bit
integer based on an additinal flag (`is_string`).
Simple offset-based mechanism is employed to keep this
type's sequence fixed-sized and thus have simple access
to property key/value by index.
"""
def __init__(self, _io, _parent=None, _root=None):
self._io = _io
self._parent = _parent
self._root = _root if _root else self
self._read()
def _read(self):
self.ofs_name = self._io.read_u4le()
self.is_string = self._io.read_u1()
self.value_or_ofs_value = self._io.read_u4le()
@property
def name(self):
"""Name of the property addressed in the strings buffer.
"""
if hasattr(self, '_m_name'):
return self._m_name
io = self._parent.strings._io
_pos = io.pos()
io.seek(self.ofs_name)
self._m_name = (io.read_bytes_term(0, False, True, True)).decode(u"UTF-8")
io.seek(_pos)
return getattr(self, '_m_name', None)
@property
def str_value(self):
"""Value of the property addressed in the strings
buffer, if this is a string value.
"""
if hasattr(self, '_m_str_value'):
return self._m_str_value
if self.is_string != 0:
io = self._parent.strings._io
_pos = io.pos()
io.seek(self.value_or_ofs_value)
self._m_str_value = (io.read_bytes_term(0, False, True, True)).decode(u"UTF-8")
io.seek(_pos)
return getattr(self, '_m_str_value', None)
@property
def int_value(self):
"""Value of the property, if this is an integer value.
"""
if hasattr(self, '_m_int_value'):
return self._m_int_value
if self.is_string == 0:
self._m_int_value = self.value_or_ofs_value
return getattr(self, '_m_int_value', None)
class BdfEncodings(KaitaiStruct):
"""Table that allows mapping of character codes to glyphs present in the
font. Supports 1-byte and 2-byte character codes.
Note that this mapping is agnostic to character encoding itself - it
can represent ASCII, Unicode (ISO/IEC 10646), various single-byte
national encodings, etc. If application cares about it, normally
encoding will be specified in `properties` table, in the properties named
`CHARSET_REGISTRY` / `CHARSET_ENCODING`.
.. seealso::
Source - https://fontforge.org/docs/techref/pcf-format.html#the-encoding-table
"""
def __init__(self, _io, _parent=None, _root=None):
self._io = _io
self._parent = _parent
self._root = _root if _root else self
self._read()
def _read(self):
self.format = PcfFont.Format(self._io, self, self._root)
self.min_char_or_byte2 = self._io.read_u2le()
self.max_char_or_byte2 = self._io.read_u2le()
self.min_byte1 = self._io.read_u2le()
self.max_byte1 = self._io.read_u2le()
self.default_char = self._io.read_u2le()
self.glyph_indexes = []
for i in range((((self.max_char_or_byte2 - self.min_char_or_byte2) + 1) * ((self.max_byte1 - self.min_byte1) + 1))):
self.glyph_indexes.append(self._io.read_u2le())
class GlyphNames(KaitaiStruct):
"""Table containing character names for every glyph.
.. seealso::
Source - https://fontforge.org/docs/techref/pcf-format.html#the-glyph-names-table
"""
def __init__(self, _io, _parent=None, _root=None):
self._io = _io
self._parent = _parent
self._root = _root if _root else self
self._read()
def _read(self):
self.format = PcfFont.Format(self._io, self, self._root)
self.num_glyphs = self._io.read_u4le()
self.names = []
for i in range(self.num_glyphs):
self.names.append(PcfFont.Table.GlyphNames.StringRef(self._io, self, self._root))
self.len_strings = self._io.read_u4le()
self._raw_strings = self._io.read_bytes(self.len_strings)
_io__raw_strings = KaitaiStream(BytesIO(self._raw_strings))
self.strings = bytes_with_io.BytesWithIo(_io__raw_strings)
class StringRef(KaitaiStruct):
def __init__(self, _io, _parent=None, _root=None):
self._io = _io
self._parent = _parent
self._root = _root if _root else self
self._read()
def _read(self):
self.ofs_string = self._io.read_u4le()
@property
def value(self):
if hasattr(self, '_m_value'):
return self._m_value
io = self._parent.strings._io
_pos = io.pos()
io.seek(self.ofs_string)
self._m_value = (io.read_bytes_term(0, False, True, True)).decode(u"UTF-8")
io.seek(_pos)
return getattr(self, '_m_value', None)
class Bitmaps(KaitaiStruct):
"""Table containing uncompressed glyph bitmaps.
.. seealso::
Source - https://fontforge.org/docs/techref/pcf-format.html#the-bitmap-table
"""
def __init__(self, _io, _parent=None, _root=None):
self._io = _io
self._parent = _parent
self._root = _root if _root else self
self._read()
def _read(self):
self.format = PcfFont.Format(self._io, self, self._root)
self.num_glyphs = self._io.read_u4le()
self.offsets = []
for i in range(self.num_glyphs):
self.offsets.append(self._io.read_u4le())
self.bitmap_sizes = []
for i in range(4):
self.bitmap_sizes.append(self._io.read_u4le())
@property
def body(self):
if hasattr(self, '_m_body'):
return self._m_body
_pos = self._io.pos()
self._io.seek(self.ofs_body)
_on = self.type
if _on == PcfFont.Types.properties:
self._raw__m_body = self._io.read_bytes(self.len_body)
_io__raw__m_body = KaitaiStream(BytesIO(self._raw__m_body))
self._m_body = PcfFont.Table.Properties(_io__raw__m_body, self, self._root)
elif _on == PcfFont.Types.bdf_encodings:
self._raw__m_body = self._io.read_bytes(self.len_body)
_io__raw__m_body = KaitaiStream(BytesIO(self._raw__m_body))
self._m_body = PcfFont.Table.BdfEncodings(_io__raw__m_body, self, self._root)
elif _on == PcfFont.Types.swidths:
self._raw__m_body = self._io.read_bytes(self.len_body)
_io__raw__m_body = KaitaiStream(BytesIO(self._raw__m_body))
self._m_body = PcfFont.Table.Swidths(_io__raw__m_body, self, self._root)
elif _on == PcfFont.Types.glyph_names:
self._raw__m_body = self._io.read_bytes(self.len_body)
_io__raw__m_body = KaitaiStream(BytesIO(self._raw__m_body))
self._m_body = PcfFont.Table.GlyphNames(_io__raw__m_body, self, self._root)
elif _on == PcfFont.Types.bitmaps:
self._raw__m_body = self._io.read_bytes(self.len_body)
_io__raw__m_body = KaitaiStream(BytesIO(self._raw__m_body))
self._m_body = PcfFont.Table.Bitmaps(_io__raw__m_body, self, self._root)
else:
self._m_body = self._io.read_bytes(self.len_body)
self._io.seek(_pos)
return getattr(self, '_m_body', None)
class Format(KaitaiStruct):
"""Table format specifier, always 4 bytes. Original implementation treats
it as always little-endian and makes liberal use of bitmasking to parse
various parts of it.
TODO: this format specification recognizes endianness and bit
order format bits, but it does not really takes any parsing
decisions based on them.
.. seealso::
Source - https://fontforge.org/docs/techref/pcf-format.html#file-header
"""
def __init__(self, _io, _parent=None, _root=None):
self._io = _io
self._parent = _parent
self._root = _root if _root else self
self._read()
def _read(self):
self.padding1 = self._io.read_bits_int_be(2)
self.scan_unit_mask = self._io.read_bits_int_be(2)
self.is_most_significant_bit_first = self._io.read_bits_int_be(1) != 0
self.is_big_endian = self._io.read_bits_int_be(1) != 0
self.glyph_pad_mask = self._io.read_bits_int_be(2)
self._io.align_to_byte()
self.format = self._io.read_u1()
self.padding = self._io.read_u2le()