.lzh file format of LHA (AKA LHarc) by Yoshizaki Haruyasu: C++11/STL parsing library

LHA (LHarc, LZH) is a file format used by a popular freeware eponymous archiver, created in 1988 by Haruyasu Yoshizaki. Over the years, many ports and implementations were developed, sporting many extensions to original 1988 LZH.

File format is pretty simple and essentially consists of a stream of records.

Application

LHA (AKA LHarc) by Yoshizaki Haruyasu

File extension

lzh

KS implementation details

License: CC0-1.0

References

This page hosts a formal specification of .lzh file format of LHA (AKA LHarc) by Yoshizaki Haruyasu using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.

Usage

Runtime library

All parsing code for C++11/STL generated by Kaitai Struct depends on the C++/STL runtime library. You have to install it before you can parse data.

For C++, the easiest way is to clone the runtime library sources and build them along with your project.

Code

Using Kaitai Struct in C++/STL usually consists of 3 steps.

  1. We need to create an STL input stream (std::istream). One can open local file for that, or use existing std::string or char* buffer.
    #include <fstream>
    
    std::ifstream is("path/to/local/file.lzh", std::ifstream::binary);
    
    #include <sstream>
    
    std::istringstream is(str);
    
    #include <sstream>
    
    const char buf[] = { ... };
    std::string str(buf, sizeof buf);
    std::istringstream is(str);
    
  2. We need to wrap our input stream into Kaitai stream:
    #include "kaitai/kaitaistream.h"
    
    kaitai::kstream ks(&is);
    
  3. And finally, we can invoke the parsing:
    lzh_t data(&ks);
    

After that, one can get various attributes from the structure by invoking getter methods like:

data.entries() // => get entries

C++11/STL source code to parse .lzh file format of LHA (AKA LHarc) by Yoshizaki Haruyasu

lzh.h

#pragma once

// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild

#include "kaitai/kaitaistruct.h"
#include <stdint.h>
#include <memory>
#include "dos_datetime.h"
#include <vector>

#if KAITAI_STRUCT_VERSION < 9000L
#error "Incompatible Kaitai Struct C++/STL API: version 0.9 or later is required"
#endif
class dos_datetime_t;

/**
 * LHA (LHarc, LZH) is a file format used by a popular freeware
 * eponymous archiver, created in 1988 by Haruyasu Yoshizaki. Over the
 * years, many ports and implementations were developed, sporting many
 * extensions to original 1988 LZH.
 * 
 * File format is pretty simple and essentially consists of a stream of
 * records.
 */

class lzh_t : public kaitai::kstruct {

public:
    class record_t;
    class file_record_t;
    class header_t;
    class header1_t;

    lzh_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent = nullptr, lzh_t* p__root = nullptr);

private:
    void _read();
    void _clean_up();

public:
    ~lzh_t();

    class record_t : public kaitai::kstruct {

    public:

        record_t(kaitai::kstream* p__io, lzh_t* p__parent = nullptr, lzh_t* p__root = nullptr);

    private:
        void _read();
        void _clean_up();

    public:
        ~record_t();

    private:
        uint8_t m_header_len;
        std::unique_ptr<file_record_t> m_file_record;
        bool n_file_record;

    public:
        bool _is_null_file_record() { file_record(); return n_file_record; };

    private:
        lzh_t* m__root;
        lzh_t* m__parent;

    public:
        uint8_t header_len() const { return m_header_len; }
        file_record_t* file_record() const { return m_file_record.get(); }
        lzh_t* _root() const { return m__root; }
        lzh_t* _parent() const { return m__parent; }
    };

    class file_record_t : public kaitai::kstruct {

    public:

        file_record_t(kaitai::kstream* p__io, lzh_t::record_t* p__parent = nullptr, lzh_t* p__root = nullptr);

    private:
        void _read();
        void _clean_up();

    public:
        ~file_record_t();

    private:
        std::unique_ptr<header_t> m_header;
        uint16_t m_file_uncompr_crc16;
        bool n_file_uncompr_crc16;

    public:
        bool _is_null_file_uncompr_crc16() { file_uncompr_crc16(); return n_file_uncompr_crc16; };

    private:
        std::string m_body;
        lzh_t* m__root;
        lzh_t::record_t* m__parent;
        std::string m__raw_header;
        std::unique_ptr<kaitai::kstream> m__io__raw_header;

    public:
        header_t* header() const { return m_header.get(); }
        uint16_t file_uncompr_crc16() const { return m_file_uncompr_crc16; }
        std::string body() const { return m_body; }
        lzh_t* _root() const { return m__root; }
        lzh_t::record_t* _parent() const { return m__parent; }
        std::string _raw_header() const { return m__raw_header; }
        kaitai::kstream* _io__raw_header() const { return m__io__raw_header.get(); }
    };

    class header_t : public kaitai::kstruct {

    public:

        header_t(kaitai::kstream* p__io, lzh_t::file_record_t* p__parent = nullptr, lzh_t* p__root = nullptr);

    private:
        void _read();
        void _clean_up();

    public:
        ~header_t();

    private:
        std::unique_ptr<header1_t> m_header1;
        uint8_t m_filename_len;
        bool n_filename_len;

    public:
        bool _is_null_filename_len() { filename_len(); return n_filename_len; };

    private:
        std::string m_filename;
        bool n_filename;

    public:
        bool _is_null_filename() { filename(); return n_filename; };

    private:
        uint16_t m_file_uncompr_crc16;
        bool n_file_uncompr_crc16;

    public:
        bool _is_null_file_uncompr_crc16() { file_uncompr_crc16(); return n_file_uncompr_crc16; };

    private:
        uint8_t m_os;
        bool n_os;

    public:
        bool _is_null_os() { os(); return n_os; };

    private:
        uint16_t m_ext_header_size;
        bool n_ext_header_size;

    public:
        bool _is_null_ext_header_size() { ext_header_size(); return n_ext_header_size; };

    private:
        lzh_t* m__root;
        lzh_t::file_record_t* m__parent;

    public:

        /**
         * Level-neutral header, same for all LHA levels. Subsequent fields order and meaning varies, based on LHA level specified in this header.
         */
        header1_t* header1() const { return m_header1.get(); }
        uint8_t filename_len() const { return m_filename_len; }
        std::string filename() const { return m_filename; }
        uint16_t file_uncompr_crc16() const { return m_file_uncompr_crc16; }
        uint8_t os() const { return m_os; }
        uint16_t ext_header_size() const { return m_ext_header_size; }
        lzh_t* _root() const { return m__root; }
        lzh_t::file_record_t* _parent() const { return m__parent; }
    };

    class header1_t : public kaitai::kstruct {

    public:

        header1_t(kaitai::kstream* p__io, lzh_t::header_t* p__parent = nullptr, lzh_t* p__root = nullptr);

    private:
        void _read();
        void _clean_up();

    public:
        ~header1_t();

    private:
        uint8_t m_header_checksum;
        std::string m_method_id;
        uint32_t m_file_size_compr;
        uint32_t m_file_size_uncompr;
        std::unique_ptr<dos_datetime_t> m_file_timestamp;
        uint8_t m_attr;
        uint8_t m_lha_level;
        lzh_t* m__root;
        lzh_t::header_t* m__parent;
        std::string m__raw_file_timestamp;
        std::unique_ptr<kaitai::kstream> m__io__raw_file_timestamp;

    public:
        uint8_t header_checksum() const { return m_header_checksum; }
        std::string method_id() const { return m_method_id; }

        /**
         * Compressed file size
         */
        uint32_t file_size_compr() const { return m_file_size_compr; }

        /**
         * Uncompressed file size
         */
        uint32_t file_size_uncompr() const { return m_file_size_uncompr; }

        /**
         * Original file date/time
         */
        dos_datetime_t* file_timestamp() const { return m_file_timestamp.get(); }

        /**
         * File or directory attribute
         */
        uint8_t attr() const { return m_attr; }
        uint8_t lha_level() const { return m_lha_level; }
        lzh_t* _root() const { return m__root; }
        lzh_t::header_t* _parent() const { return m__parent; }
        std::string _raw_file_timestamp() const { return m__raw_file_timestamp; }
        kaitai::kstream* _io__raw_file_timestamp() const { return m__io__raw_file_timestamp.get(); }
    };

private:
    std::unique_ptr<std::vector<std::unique_ptr<record_t>>> m_entries;
    lzh_t* m__root;
    kaitai::kstruct* m__parent;

public:
    std::vector<std::unique_ptr<record_t>>* entries() const { return m_entries.get(); }
    lzh_t* _root() const { return m__root; }
    kaitai::kstruct* _parent() const { return m__parent; }
};

lzh.cpp

// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild

#include "lzh.h"

lzh_t::lzh_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent, lzh_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = this;
    m_entries = nullptr;
    _read();
}

void lzh_t::_read() {
    m_entries = std::unique_ptr<std::vector<std::unique_ptr<record_t>>>(new std::vector<std::unique_ptr<record_t>>());
    {
        int i = 0;
        while (!m__io->is_eof()) {
            m_entries->push_back(std::move(std::unique_ptr<record_t>(new record_t(m__io, this, m__root))));
            i++;
        }
    }
}

lzh_t::~lzh_t() {
    _clean_up();
}

void lzh_t::_clean_up() {
}

lzh_t::record_t::record_t(kaitai::kstream* p__io, lzh_t* p__parent, lzh_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    m_file_record = nullptr;
    _read();
}

void lzh_t::record_t::_read() {
    m_header_len = m__io->read_u1();
    n_file_record = true;
    if (header_len() > 0) {
        n_file_record = false;
        m_file_record = std::unique_ptr<file_record_t>(new file_record_t(m__io, this, m__root));
    }
}

lzh_t::record_t::~record_t() {
    _clean_up();
}

void lzh_t::record_t::_clean_up() {
    if (!n_file_record) {
    }
}

lzh_t::file_record_t::file_record_t(kaitai::kstream* p__io, lzh_t::record_t* p__parent, lzh_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    m_header = nullptr;
    m__io__raw_header = nullptr;
    _read();
}

void lzh_t::file_record_t::_read() {
    m__raw_header = m__io->read_bytes((_parent()->header_len() - 1));
    m__io__raw_header = std::unique_ptr<kaitai::kstream>(new kaitai::kstream(m__raw_header));
    m_header = std::unique_ptr<header_t>(new header_t(m__io__raw_header.get(), this, m__root));
    n_file_uncompr_crc16 = true;
    if (header()->header1()->lha_level() == 0) {
        n_file_uncompr_crc16 = false;
        m_file_uncompr_crc16 = m__io->read_u2le();
    }
    m_body = m__io->read_bytes(header()->header1()->file_size_compr());
}

lzh_t::file_record_t::~file_record_t() {
    _clean_up();
}

void lzh_t::file_record_t::_clean_up() {
    if (!n_file_uncompr_crc16) {
    }
}

lzh_t::header_t::header_t(kaitai::kstream* p__io, lzh_t::file_record_t* p__parent, lzh_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    m_header1 = nullptr;
    _read();
}

void lzh_t::header_t::_read() {
    m_header1 = std::unique_ptr<header1_t>(new header1_t(m__io, this, m__root));
    n_filename_len = true;
    if (header1()->lha_level() == 0) {
        n_filename_len = false;
        m_filename_len = m__io->read_u1();
    }
    n_filename = true;
    if (header1()->lha_level() == 0) {
        n_filename = false;
        m_filename = kaitai::kstream::bytes_to_str(m__io->read_bytes(filename_len()), std::string("ASCII"));
    }
    n_file_uncompr_crc16 = true;
    if (header1()->lha_level() == 2) {
        n_file_uncompr_crc16 = false;
        m_file_uncompr_crc16 = m__io->read_u2le();
    }
    n_os = true;
    if (header1()->lha_level() == 2) {
        n_os = false;
        m_os = m__io->read_u1();
    }
    n_ext_header_size = true;
    if (header1()->lha_level() == 2) {
        n_ext_header_size = false;
        m_ext_header_size = m__io->read_u2le();
    }
}

lzh_t::header_t::~header_t() {
    _clean_up();
}

void lzh_t::header_t::_clean_up() {
    if (!n_filename_len) {
    }
    if (!n_filename) {
    }
    if (!n_file_uncompr_crc16) {
    }
    if (!n_os) {
    }
    if (!n_ext_header_size) {
    }
}

lzh_t::header1_t::header1_t(kaitai::kstream* p__io, lzh_t::header_t* p__parent, lzh_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    m_file_timestamp = nullptr;
    m__io__raw_file_timestamp = nullptr;
    _read();
}

void lzh_t::header1_t::_read() {
    m_header_checksum = m__io->read_u1();
    m_method_id = kaitai::kstream::bytes_to_str(m__io->read_bytes(5), std::string("ASCII"));
    m_file_size_compr = m__io->read_u4le();
    m_file_size_uncompr = m__io->read_u4le();
    m__raw_file_timestamp = m__io->read_bytes(4);
    m__io__raw_file_timestamp = std::unique_ptr<kaitai::kstream>(new kaitai::kstream(m__raw_file_timestamp));
    m_file_timestamp = std::unique_ptr<dos_datetime_t>(new dos_datetime_t(m__io__raw_file_timestamp.get()));
    m_attr = m__io->read_u1();
    m_lha_level = m__io->read_u1();
}

lzh_t::header1_t::~header1_t() {
    _clean_up();
}

void lzh_t::header1_t::_clean_up() {
}