Gzip is a popular and standard single-file archiving format. It essentially provides a container that stores original file name, timestamp and a few other things (like optional comment), basic CRCs, etc, and a file compressed by a chosen compression algorithm.
As of 2019, there is actually only one working solution for compression algorithms, so it's typically raw DEFLATE stream (without zlib header) in all gzipped files.
This page hosts a formal specification of .gz file format using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.
// Code generated by kaitai-struct-compiler from a .ksy source file. DO NOT EDIT.
import (
"github.com/kaitai-io/kaitai_struct_go_runtime/kaitai"
"bytes"
)
/**
* Gzip is a popular and standard single-file archiving format. It
* essentially provides a container that stores original file name,
* timestamp and a few other things (like optional comment), basic
* CRCs, etc, and a file compressed by a chosen compression algorithm.
*
* As of 2019, there is actually only one working solution for
* compression algorithms, so it's typically raw DEFLATE stream
* (without zlib header) in all gzipped files.
* @see <a href="https://www.rfc-editor.org/rfc/rfc1952">Source</a>
*/
type Gzip_CompressionMethods int
const (
Gzip_CompressionMethods__Deflate Gzip_CompressionMethods = 8
)
var values_Gzip_CompressionMethods = map[Gzip_CompressionMethods]struct{}{8: {}}
func (v Gzip_CompressionMethods) isDefined() bool {
_, ok := values_Gzip_CompressionMethods[v]
return ok
}
type Gzip_Oses int
const (
Gzip_Oses__Fat Gzip_Oses = 0
Gzip_Oses__Amiga Gzip_Oses = 1
Gzip_Oses__Vms Gzip_Oses = 2
Gzip_Oses__Unix Gzip_Oses = 3
Gzip_Oses__VmCms Gzip_Oses = 4
Gzip_Oses__AtariTos Gzip_Oses = 5
Gzip_Oses__Hpfs Gzip_Oses = 6
Gzip_Oses__Macintosh Gzip_Oses = 7
Gzip_Oses__ZSystem Gzip_Oses = 8
Gzip_Oses__CpM Gzip_Oses = 9
Gzip_Oses__Tops20 Gzip_Oses = 10
Gzip_Oses__Ntfs Gzip_Oses = 11
Gzip_Oses__Qdos Gzip_Oses = 12
Gzip_Oses__AcornRiscos Gzip_Oses = 13
Gzip_Oses__Unknown Gzip_Oses = 255
)
var values_Gzip_Oses = map[Gzip_Oses]struct{}{0: {}, 1: {}, 2: {}, 3: {}, 4: {}, 5: {}, 6: {}, 7: {}, 8: {}, 9: {}, 10: {}, 11: {}, 12: {}, 13: {}, 255: {}}
func (v Gzip_Oses) isDefined() bool {
_, ok := values_Gzip_Oses[v]
return ok
}
type Gzip struct {
Magic []byte
CompressionMethod Gzip_CompressionMethods
Flags *Gzip_Flags
ModTime uint32
ExtraFlags *Gzip_ExtraFlagsDeflate
Os Gzip_Oses
Extras *Gzip_Extras
Name []byte
Comment []byte
HeaderCrc16 uint16
Body []byte
BodyCrc32 uint32
LenUncompressed uint32
_io *kaitai.Stream
_root *Gzip
_parent kaitai.Struct
}
func NewGzip() *Gzip {
return &Gzip{
}
}
func (this Gzip) IO_() *kaitai.Stream {
return this._io
}
func (this *Gzip) Read(io *kaitai.Stream, parent kaitai.Struct, root *Gzip) (err error) {
this._io = io
this._parent = parent
this._root = root
tmp1, err := this._io.ReadBytes(int(2))
if err != nil {
return err
}
tmp1 = tmp1
this.Magic = tmp1
if !(bytes.Equal(this.Magic, []uint8{31, 139})) {
return kaitai.NewValidationNotEqualError([]uint8{31, 139}, this.Magic, this._io, "/seq/0")
}
tmp2, err := this._io.ReadU1()
if err != nil {
return err
}
this.CompressionMethod = Gzip_CompressionMethods(tmp2)
tmp3 := NewGzip_Flags()
err = tmp3.Read(this._io, this, this._root)
if err != nil {
return err
}
this.Flags = tmp3
tmp4, err := this._io.ReadU4le()
if err != nil {
return err
}
this.ModTime = uint32(tmp4)
switch (this.CompressionMethod) {
case Gzip_CompressionMethods__Deflate:
tmp5 := NewGzip_ExtraFlagsDeflate()
err = tmp5.Read(this._io, this, this._root)
if err != nil {
return err
}
this.ExtraFlags = tmp5
}
tmp6, err := this._io.ReadU1()
if err != nil {
return err
}
this.Os = Gzip_Oses(tmp6)
if (this.Flags.HasExtra) {
tmp7 := NewGzip_Extras()
err = tmp7.Read(this._io, this, this._root)
if err != nil {
return err
}
this.Extras = tmp7
}
if (this.Flags.HasName) {
tmp8, err := this._io.ReadBytesTerm(0, false, true, true)
if err != nil {
return err
}
this.Name = tmp8
}
if (this.Flags.HasComment) {
tmp9, err := this._io.ReadBytesTerm(0, false, true, true)
if err != nil {
return err
}
this.Comment = tmp9
}
if (this.Flags.HasHeaderCrc) {
tmp10, err := this._io.ReadU2le()
if err != nil {
return err
}
this.HeaderCrc16 = uint16(tmp10)
}
tmp11, err := this._io.Size()
if err != nil {
return err
}
tmp12, err := this._io.Pos()
if err != nil {
return err
}
tmp13, err := this._io.ReadBytes(int((tmp11 - tmp12) - 8))
if err != nil {
return err
}
tmp13 = tmp13
this.Body = tmp13
tmp14, err := this._io.ReadU4le()
if err != nil {
return err
}
this.BodyCrc32 = uint32(tmp14)
tmp15, err := this._io.ReadU4le()
if err != nil {
return err
}
this.LenUncompressed = uint32(tmp15)
return err
}
/**
* Compression method used to compress file body. In practice, only
* one method is widely used: 8 = deflate.
*/
/**
* Last modification time of a file archived in UNIX timestamp format.
*/
/**
* Extra flags, specific to compression method chosen.
*/
/**
* OS used to compress this file.
*/
/**
* Compressed body of a file archived. Note that we don't make an
* attempt to decompress it here.
*/
/**
* CRC32 checksum of an uncompressed file body
*/
/**
* Size of original uncompressed data in bytes (truncated to 32
* bits).
*/
type Gzip_ExtraFlagsDeflate_CompressionStrengths int
const (
Gzip_ExtraFlagsDeflate_CompressionStrengths__Best Gzip_ExtraFlagsDeflate_CompressionStrengths = 2
Gzip_ExtraFlagsDeflate_CompressionStrengths__Fast Gzip_ExtraFlagsDeflate_CompressionStrengths = 4
)
var values_Gzip_ExtraFlagsDeflate_CompressionStrengths = map[Gzip_ExtraFlagsDeflate_CompressionStrengths]struct{}{2: {}, 4: {}}
func (v Gzip_ExtraFlagsDeflate_CompressionStrengths) isDefined() bool {
_, ok := values_Gzip_ExtraFlagsDeflate_CompressionStrengths[v]
return ok
}
type Gzip_ExtraFlagsDeflate struct {
CompressionStrength Gzip_ExtraFlagsDeflate_CompressionStrengths
_io *kaitai.Stream
_root *Gzip
_parent *Gzip
}
func NewGzip_ExtraFlagsDeflate() *Gzip_ExtraFlagsDeflate {
return &Gzip_ExtraFlagsDeflate{
}
}
func (this Gzip_ExtraFlagsDeflate) IO_() *kaitai.Stream {
return this._io
}
func (this *Gzip_ExtraFlagsDeflate) Read(io *kaitai.Stream, parent *Gzip, root *Gzip) (err error) {
this._io = io
this._parent = parent
this._root = root
tmp16, err := this._io.ReadU1()
if err != nil {
return err
}
this.CompressionStrength = Gzip_ExtraFlagsDeflate_CompressionStrengths(tmp16)
return err
}
type Gzip_Extras struct {
LenSubfields uint16
Subfields *Gzip_Subfields
_io *kaitai.Stream
_root *Gzip
_parent *Gzip
_raw_Subfields []byte
}
func NewGzip_Extras() *Gzip_Extras {
return &Gzip_Extras{
}
}
func (this Gzip_Extras) IO_() *kaitai.Stream {
return this._io
}
func (this *Gzip_Extras) Read(io *kaitai.Stream, parent *Gzip, root *Gzip) (err error) {
this._io = io
this._parent = parent
this._root = root
tmp17, err := this._io.ReadU2le()
if err != nil {
return err
}
this.LenSubfields = uint16(tmp17)
tmp18, err := this._io.ReadBytes(int(this.LenSubfields))
if err != nil {
return err
}
tmp18 = tmp18
this._raw_Subfields = tmp18
_io__raw_Subfields := kaitai.NewStream(bytes.NewReader(this._raw_Subfields))
tmp19 := NewGzip_Subfields()
err = tmp19.Read(_io__raw_Subfields, this, this._root)
if err != nil {
return err
}
this.Subfields = tmp19
return err
}
type Gzip_Flags struct {
Reserved1 uint64
HasComment bool
HasName bool
HasExtra bool
HasHeaderCrc bool
IsText bool
_io *kaitai.Stream
_root *Gzip
_parent *Gzip
}
func NewGzip_Flags() *Gzip_Flags {
return &Gzip_Flags{
}
}
func (this Gzip_Flags) IO_() *kaitai.Stream {
return this._io
}
func (this *Gzip_Flags) Read(io *kaitai.Stream, parent *Gzip, root *Gzip) (err error) {
this._io = io
this._parent = parent
this._root = root
tmp20, err := this._io.ReadBitsIntBe(3)
if err != nil {
return err
}
this.Reserved1 = tmp20
tmp21, err := this._io.ReadBitsIntBe(1)
if err != nil {
return err
}
this.HasComment = tmp21 != 0
tmp22, err := this._io.ReadBitsIntBe(1)
if err != nil {
return err
}
this.HasName = tmp22 != 0
tmp23, err := this._io.ReadBitsIntBe(1)
if err != nil {
return err
}
this.HasExtra = tmp23 != 0
tmp24, err := this._io.ReadBitsIntBe(1)
if err != nil {
return err
}
this.HasHeaderCrc = tmp24 != 0
tmp25, err := this._io.ReadBitsIntBe(1)
if err != nil {
return err
}
this.IsText = tmp25 != 0
return err
}
/**
* If true, optional extra fields are present in the archive.
*/
/**
* If true, this archive includes a CRC16 checksum for the header.
*/
/**
* If true, file inside this archive is a text file from
* compressor's point of view.
*/
/**
* Every subfield follows typical [TLV scheme](https://en.wikipedia.org/wiki/Type-length-value):
*
* * `id` serves role of "T"ype
* * `len_data` serves role of "L"ength
* * `data` serves role of "V"alue
*
* This way it's possible to for arbitrary parser to skip over
* subfields it does not support.
*/
type Gzip_Subfield struct {
Id uint16
LenData uint16
Data []byte
_io *kaitai.Stream
_root *Gzip
_parent *Gzip_Subfields
}
func NewGzip_Subfield() *Gzip_Subfield {
return &Gzip_Subfield{
}
}
func (this Gzip_Subfield) IO_() *kaitai.Stream {
return this._io
}
func (this *Gzip_Subfield) Read(io *kaitai.Stream, parent *Gzip_Subfields, root *Gzip) (err error) {
this._io = io
this._parent = parent
this._root = root
tmp26, err := this._io.ReadU2le()
if err != nil {
return err
}
this.Id = uint16(tmp26)
tmp27, err := this._io.ReadU2le()
if err != nil {
return err
}
this.LenData = uint16(tmp27)
tmp28, err := this._io.ReadBytes(int(this.LenData))
if err != nil {
return err
}
tmp28 = tmp28
this.Data = tmp28
return err
}
/**
* Subfield ID, typically two ASCII letters.
*/
/**
* Container for many subfields, constrained by size of stream.
*/
type Gzip_Subfields struct {
Entries []*Gzip_Subfield
_io *kaitai.Stream
_root *Gzip
_parent *Gzip_Extras
}
func NewGzip_Subfields() *Gzip_Subfields {
return &Gzip_Subfields{
}
}
func (this Gzip_Subfields) IO_() *kaitai.Stream {
return this._io
}
func (this *Gzip_Subfields) Read(io *kaitai.Stream, parent *Gzip_Extras, root *Gzip) (err error) {
this._io = io
this._parent = parent
this._root = root
for i := 0;; i++ {
tmp29, err := this._io.EOF()
if err != nil {
return err
}
if tmp29 {
break
}
tmp30 := NewGzip_Subfield()
err = tmp30.Read(this._io, this, this._root)
if err != nil {
return err
}
this.Entries = append(this.Entries, tmp30)
}
return err
}