A simple wrapper which allows to read a UTF-16 encoded string that starts with a byte order mark (BOM). The BOM indicates the endianness of the UTF-16 encoding, which can be either big-endian (BE) or little-endian (LE).
Use:
value to get the string value with BOM stripped, regardless of endianness.is_be and is_le to check the endianness indicated by the BOM.bom to check the raw byte order mark.This page hosts a formal specification of UTF-16 string with BOM using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.
// Code generated by kaitai-struct-compiler from a .ksy source file. DO NOT EDIT.
import (
"github.com/kaitai-io/kaitai_struct_go_runtime/kaitai"
"bytes"
"golang.org/x/text/encoding/unicode"
)
/**
* A simple wrapper which allows to read a UTF-16 encoded string that starts
* with a byte order mark (BOM). The BOM indicates the endianness of the UTF-16
* encoding, which can be either big-endian (BE) or little-endian (LE).
*
* Use:
*
* * `value` to get the string value with BOM stripped, regardless of endianness.
* * `is_be` and `is_le` to check the endianness indicated by the BOM.
* * `bom` to check the raw byte order mark.
* @see "- https://en.wikipedia.org/wiki/Byte_order_mark
* "
*/
type Utf16WithBom struct {
Bom []byte
StrBe string
StrLe string
_io *kaitai.Stream
_root *Utf16WithBom
_parent kaitai.Struct
_f_isBe bool
isBe bool
_f_isLe bool
isLe bool
_f_value bool
value string
}
func NewUtf16WithBom() *Utf16WithBom {
return &Utf16WithBom{
}
}
func (this Utf16WithBom) IO_() *kaitai.Stream {
return this._io
}
func (this *Utf16WithBom) Read(io *kaitai.Stream, parent kaitai.Struct, root *Utf16WithBom) (err error) {
this._io = io
this._parent = parent
this._root = root
tmp1, err := this._io.ReadBytes(int(2))
if err != nil {
return err
}
tmp1 = tmp1
this.Bom = tmp1
if !( ((bytes.Equal(this.Bom, []uint8{254, 255})) || (bytes.Equal(this.Bom, []uint8{255, 254}))) ) {
return kaitai.NewValidationNotAnyOfError(this.Bom, this._io, "/seq/0")
}
tmp2, err := this.IsBe()
if err != nil {
return err
}
if (tmp2) {
tmp3, err := this._io.ReadBytesFull()
if err != nil {
return err
}
tmp3 = tmp3
tmp4, err := kaitai.BytesToStr(tmp3, unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewDecoder())
if err != nil {
return err
}
this.StrBe = tmp4
}
tmp5, err := this.IsLe()
if err != nil {
return err
}
if (tmp5) {
tmp6, err := this._io.ReadBytesFull()
if err != nil {
return err
}
tmp6 = tmp6
tmp7, err := kaitai.BytesToStr(tmp6, unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM).NewDecoder())
if err != nil {
return err
}
this.StrLe = tmp7
}
return err
}
/**
* True if the byte order mark indicates big-endian UTF-16 encoding.
*/
func (this *Utf16WithBom) IsBe() (v bool, err error) {
if (this._f_isBe) {
return this.isBe, nil
}
this._f_isBe = true
this.isBe = bool(bytes.Equal(this.Bom, []uint8{254, 255}))
return this.isBe, nil
}
/**
* True if the byte order mark indicates little-endian UTF-16 encoding.
*/
func (this *Utf16WithBom) IsLe() (v bool, err error) {
if (this._f_isLe) {
return this.isLe, nil
}
this._f_isLe = true
this.isLe = bool(bytes.Equal(this.Bom, []uint8{255, 254}))
return this.isLe, nil
}
/**
* The string value with BOM stripped, regardless of endianness.
*/
func (this *Utf16WithBom) Value() (v string, err error) {
if (this._f_value) {
return this.value, nil
}
this._f_value = true
var tmp8 string;
tmp9, err := this.IsBe()
if err != nil {
return "", err
}
if (tmp9) {
tmp8 = this.StrBe
} else {
tmp8 = this.StrLe
}
this.value = string(tmp8)
return this.value, nil
}
/**
* The byte order mark (BOM) is a special marker at the beginning of the
* string that indicates the endianness of the UTF-16 encoding. The
* character U+FEFF is used as the BOM, and its byte representation differs
* based on endianness:
*
* * For big-endian (BE) UTF-16, it's `[0xFE, 0xFF]`
* * For little-endian (LE) UTF-16, it's `[0xFF, 0xFE]`
*
* This implementation checks for the presence of a valid BOM and strips it
* from the resulting string value.
*/