A simple wrapper which allows to read a UTF-16 encoded string that starts with a byte order mark (BOM). The BOM indicates the endianness of the UTF-16 encoding, which can be either big-endian (BE) or little-endian (LE).
Use:
value to get the string value with BOM stripped, regardless of endianness.is_be and is_le to check the endianness indicated by the BOM.bom to check the raw byte order mark.This page hosts a formal specification of UTF-16 string with BOM using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.
# This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild
# type: ignore
import kaitaistruct
from kaitaistruct import ReadWriteKaitaiStruct, KaitaiStream, BytesIO
if getattr(kaitaistruct, 'API_VERSION', (0, 9)) < (0, 11):
raise Exception("Incompatible Kaitai Struct Python API: 0.11 or later is required, but you have %s" % (kaitaistruct.__version__))
class Utf16WithBom(ReadWriteKaitaiStruct):
"""A simple wrapper which allows to read a UTF-16 encoded string that starts
with a byte order mark (BOM). The BOM indicates the endianness of the UTF-16
encoding, which can be either big-endian (BE) or little-endian (LE).
Use:
* `value` to get the string value with BOM stripped, regardless of endianness.
* `is_be` and `is_le` to check the endianness indicated by the BOM.
* `bom` to check the raw byte order mark.
.. seealso::
- https://en.wikipedia.org/wiki/Byte_order_mark
"""
def __init__(self, _io=None, _parent=None, _root=None):
super(Utf16WithBom, self).__init__(_io)
self._parent = _parent
self._root = _root or self
def _read(self):
self.bom = self._io.read_bytes(2)
if not ((self.bom == b"\xFE\xFF") or (self.bom == b"\xFF\xFE")) :
raise kaitaistruct.ValidationNotAnyOfError(self.bom, self._io, u"/seq/0")
if self.is_be:
pass
self.str_be = (self._io.read_bytes_full()).decode(u"UTF-16BE")
if self.is_le:
pass
self.str_le = (self._io.read_bytes_full()).decode(u"UTF-16LE")
self._dirty = False
def _fetch_instances(self):
pass
if self.is_be:
pass
if self.is_le:
pass
def _write__seq(self, io=None):
super(Utf16WithBom, self)._write__seq(io)
self._io.write_bytes(self.bom)
if self.is_be:
pass
self._io.write_bytes((self.str_be).encode(u"UTF-16BE"))
if not self._io.is_eof():
raise kaitaistruct.ConsistencyError(u"str_be", 0, self._io.size() - self._io.pos())
if self.is_le:
pass
self._io.write_bytes((self.str_le).encode(u"UTF-16LE"))
if not self._io.is_eof():
raise kaitaistruct.ConsistencyError(u"str_le", 0, self._io.size() - self._io.pos())
def _check(self):
if len(self.bom) != 2:
raise kaitaistruct.ConsistencyError(u"bom", 2, len(self.bom))
if not ((self.bom == b"\xFE\xFF") or (self.bom == b"\xFF\xFE")) :
raise kaitaistruct.ValidationNotAnyOfError(self.bom, None, u"/seq/0")
if self.is_be:
pass
if self.is_le:
pass
self._dirty = False
@property
def is_be(self):
"""True if the byte order mark indicates big-endian UTF-16 encoding."""
if hasattr(self, '_m_is_be'):
return self._m_is_be
self._m_is_be = self.bom == b"\xFE\xFF"
return getattr(self, '_m_is_be', None)
def _invalidate_is_be(self):
del self._m_is_be
@property
def is_le(self):
"""True if the byte order mark indicates little-endian UTF-16 encoding."""
if hasattr(self, '_m_is_le'):
return self._m_is_le
self._m_is_le = self.bom == b"\xFF\xFE"
return getattr(self, '_m_is_le', None)
def _invalidate_is_le(self):
del self._m_is_le
@property
def value(self):
"""The string value with BOM stripped, regardless of endianness."""
if hasattr(self, '_m_value'):
return self._m_value
self._m_value = (self.str_be if self.is_be else self.str_le)
return getattr(self, '_m_value', None)
def _invalidate_value(self):
del self._m_value