Chrome PAK serialization format: C++11/STL parsing library

Format mostly used by Google Chrome and various Android apps to store resources such as translated strings, help messages and images.

File extension

pak

KS implementation details

License: CC0-1.0

This page hosts a formal specification of Chrome PAK serialization format using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.

Usage

Runtime library

All parsing code for C++11/STL generated by Kaitai Struct depends on the C++/STL runtime library. You have to install it before you can parse data.

For C++, the easiest way is to clone the runtime library sources and build them along with your project.

Code

Using Kaitai Struct in C++/STL usually consists of 3 steps.

  1. We need to create an STL input stream (std::istream). One can open local file for that, or use existing std::string or char* buffer.
    #include <fstream>
    
    std::ifstream is("path/to/local/file.pak", std::ifstream::binary);
    
    #include <sstream>
    
    std::istringstream is(str);
    
    #include <sstream>
    
    const char buf[] = { ... };
    std::string str(buf, sizeof buf);
    std::istringstream is(str);
    
  2. We need to wrap our input stream into Kaitai stream:
    #include "kaitai/kaitaistream.h"
    
    kaitai::kstream ks(&is);
    
  3. And finally, we can invoke the parsing:
    chrome_pak_t data(&ks);
    

After that, one can get various attributes from the structure by invoking getter methods like:

data.version() // => only versions 4 and 5 are supported

C++11/STL source code to parse Chrome PAK serialization format

chrome_pak.h

#pragma once

// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild

#include "kaitai/kaitaistruct.h"
#include <stdint.h>
#include <memory>
#include <vector>

#if KAITAI_STRUCT_VERSION < 9000L
#error "Incompatible Kaitai Struct C++/STL API: version 0.9 or later is required"
#endif

/**
 * Format mostly used by Google Chrome and various Android apps to store
 * resources such as translated strings, help messages and images.
 * \sa https://web.archive.org/web/20220126211447/https://dev.chromium.org/developers/design-documents/linuxresourcesandlocalizedstrings Source
 * \sa https://chromium.googlesource.com/chromium/src/tools/grit/+/3c36f27/grit/format/data_pack.py Source
 * \sa https://chromium.googlesource.com/chromium/src/tools/grit/+/8a23eae/grit/format/data_pack.py Source
 */

class chrome_pak_t : public kaitai::kstruct {

public:
    class header_v5_part_t;
    class resource_t;
    class alias_t;

    enum encodings_t {
        ENCODINGS_BINARY = 0,
        ENCODINGS_UTF8 = 1,
        ENCODINGS_UTF16 = 2
    };

    chrome_pak_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent = nullptr, chrome_pak_t* p__root = nullptr);

private:
    void _read();
    void _clean_up();

public:
    ~chrome_pak_t();

    class header_v5_part_t : public kaitai::kstruct {

    public:

        header_v5_part_t(kaitai::kstream* p__io, chrome_pak_t* p__parent = nullptr, chrome_pak_t* p__root = nullptr);

    private:
        void _read();
        void _clean_up();

    public:
        ~header_v5_part_t();

    private:
        std::string m_encoding_padding;
        uint16_t m_num_resources;
        uint16_t m_num_aliases;
        chrome_pak_t* m__root;
        chrome_pak_t* m__parent;

    public:
        std::string encoding_padding() const { return m_encoding_padding; }
        uint16_t num_resources() const { return m_num_resources; }
        uint16_t num_aliases() const { return m_num_aliases; }
        chrome_pak_t* _root() const { return m__root; }
        chrome_pak_t* _parent() const { return m__parent; }
    };

    class resource_t : public kaitai::kstruct {

    public:

        resource_t(int32_t p_idx, bool p_has_body, kaitai::kstream* p__io, chrome_pak_t* p__parent = nullptr, chrome_pak_t* p__root = nullptr);

    private:
        void _read();
        void _clean_up();

    public:
        ~resource_t();

    private:
        bool f_len_body;
        int32_t m_len_body;
        bool n_len_body;

    public:
        bool _is_null_len_body() { len_body(); return n_len_body; };

    private:

    public:

        /**
         * MUST NOT be accessed until the next `resource` is parsed
         */
        int32_t len_body();

    private:
        bool f_body;
        std::string m_body;
        bool n_body;

    public:
        bool _is_null_body() { body(); return n_body; };

    private:

    public:

        /**
         * MUST NOT be accessed until the next `resource` is parsed
         */
        std::string body();

    private:
        uint16_t m_id;
        uint32_t m_ofs_body;
        int32_t m_idx;
        bool m_has_body;
        chrome_pak_t* m__root;
        chrome_pak_t* m__parent;

    public:
        uint16_t id() const { return m_id; }
        uint32_t ofs_body() const { return m_ofs_body; }
        int32_t idx() const { return m_idx; }
        bool has_body() const { return m_has_body; }
        chrome_pak_t* _root() const { return m__root; }
        chrome_pak_t* _parent() const { return m__parent; }
    };

    class alias_t : public kaitai::kstruct {

    public:

        alias_t(kaitai::kstream* p__io, chrome_pak_t* p__parent = nullptr, chrome_pak_t* p__root = nullptr);

    private:
        void _read();
        void _clean_up();

    public:
        ~alias_t();

    private:
        bool f_resource;
        resource_t* m_resource;

    public:
        resource_t* resource();

    private:
        uint16_t m_id;
        uint16_t m_resource_idx;
        chrome_pak_t* m__root;
        chrome_pak_t* m__parent;

    public:
        uint16_t id() const { return m_id; }
        uint16_t resource_idx() const { return m_resource_idx; }
        chrome_pak_t* _root() const { return m__root; }
        chrome_pak_t* _parent() const { return m__parent; }
    };

private:
    bool f_num_resources;
    uint32_t m_num_resources;

public:
    uint32_t num_resources();

private:
    bool f_num_aliases;
    uint16_t m_num_aliases;

public:
    uint16_t num_aliases();

private:
    uint32_t m_version;
    uint32_t m_num_resources_v4;
    bool n_num_resources_v4;

public:
    bool _is_null_num_resources_v4() { num_resources_v4(); return n_num_resources_v4; };

private:
    encodings_t m_encoding;
    std::unique_ptr<header_v5_part_t> m_v5_part;
    bool n_v5_part;

public:
    bool _is_null_v5_part() { v5_part(); return n_v5_part; };

private:
    std::unique_ptr<std::vector<std::unique_ptr<resource_t>>> m_resources;
    std::unique_ptr<std::vector<std::unique_ptr<alias_t>>> m_aliases;
    chrome_pak_t* m__root;
    kaitai::kstruct* m__parent;

public:

    /**
     * only versions 4 and 5 are supported
     */
    uint32_t version() const { return m_version; }
    uint32_t num_resources_v4() const { return m_num_resources_v4; }

    /**
     * Character encoding of all text resources in the PAK file. Note that
     * the file can **always** contain binary resources, this only applies to
     * those that are supposed to hold text.
     * 
     * In practice, this will probably always be `encodings::utf8` - I haven't
     * seen any organic file that would state otherwise. `UTF8` is also usually
     * hardcoded in Python scripts from the GRIT repository that generate .pak
     * files (for example
     * [`pak_util.py:79`](https://chromium.googlesource.com/chromium/src/tools/grit/+/8a23eae/pak_util.py#79)).
     */
    encodings_t encoding() const { return m_encoding; }
    header_v5_part_t* v5_part() const { return m_v5_part.get(); }

    /**
     * The length is calculated by looking at the offset of
     * the next item, so an extra entry is stored with id 0
     * and offset pointing to the end of the resources.
     */
    std::vector<std::unique_ptr<resource_t>>* resources() const { return m_resources.get(); }
    std::vector<std::unique_ptr<alias_t>>* aliases() const { return m_aliases.get(); }
    chrome_pak_t* _root() const { return m__root; }
    kaitai::kstruct* _parent() const { return m__parent; }
};

chrome_pak.cpp

// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild

#include "chrome_pak.h"
#include "kaitai/exceptions.h"

chrome_pak_t::chrome_pak_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent, chrome_pak_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = this;
    m_v5_part = nullptr;
    m_resources = nullptr;
    m_aliases = nullptr;
    f_num_resources = false;
    f_num_aliases = false;
    _read();
}

void chrome_pak_t::_read() {
    m_version = m__io->read_u4le();
    if (!( ((version() == 4) || (version() == 5)) )) {
        throw kaitai::validation_not_any_of_error<uint32_t>(version(), _io(), std::string("/seq/0"));
    }
    n_num_resources_v4 = true;
    if (version() == 4) {
        n_num_resources_v4 = false;
        m_num_resources_v4 = m__io->read_u4le();
    }
    m_encoding = static_cast<chrome_pak_t::encodings_t>(m__io->read_u1());
    n_v5_part = true;
    if (version() == 5) {
        n_v5_part = false;
        m_v5_part = std::unique_ptr<header_v5_part_t>(new header_v5_part_t(m__io, this, m__root));
    }
    m_resources = std::unique_ptr<std::vector<std::unique_ptr<resource_t>>>(new std::vector<std::unique_ptr<resource_t>>());
    const int l_resources = (num_resources() + 1);
    for (int i = 0; i < l_resources; i++) {
        m_resources->push_back(std::move(std::unique_ptr<resource_t>(new resource_t(i, i < num_resources(), m__io, this, m__root))));
    }
    m_aliases = std::unique_ptr<std::vector<std::unique_ptr<alias_t>>>(new std::vector<std::unique_ptr<alias_t>>());
    const int l_aliases = num_aliases();
    for (int i = 0; i < l_aliases; i++) {
        m_aliases->push_back(std::move(std::unique_ptr<alias_t>(new alias_t(m__io, this, m__root))));
    }
}

chrome_pak_t::~chrome_pak_t() {
    _clean_up();
}

void chrome_pak_t::_clean_up() {
    if (!n_num_resources_v4) {
    }
    if (!n_v5_part) {
    }
}

chrome_pak_t::header_v5_part_t::header_v5_part_t(kaitai::kstream* p__io, chrome_pak_t* p__parent, chrome_pak_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    _read();
}

void chrome_pak_t::header_v5_part_t::_read() {
    m_encoding_padding = m__io->read_bytes(3);
    m_num_resources = m__io->read_u2le();
    m_num_aliases = m__io->read_u2le();
}

chrome_pak_t::header_v5_part_t::~header_v5_part_t() {
    _clean_up();
}

void chrome_pak_t::header_v5_part_t::_clean_up() {
}

chrome_pak_t::resource_t::resource_t(int32_t p_idx, bool p_has_body, kaitai::kstream* p__io, chrome_pak_t* p__parent, chrome_pak_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    m_idx = p_idx;
    m_has_body = p_has_body;
    f_len_body = false;
    f_body = false;
    _read();
}

void chrome_pak_t::resource_t::_read() {
    m_id = m__io->read_u2le();
    m_ofs_body = m__io->read_u4le();
}

chrome_pak_t::resource_t::~resource_t() {
    _clean_up();
}

void chrome_pak_t::resource_t::_clean_up() {
    if (f_body && !n_body) {
    }
}

int32_t chrome_pak_t::resource_t::len_body() {
    if (f_len_body)
        return m_len_body;
    n_len_body = true;
    if (has_body()) {
        n_len_body = false;
        m_len_body = (_parent()->resources()->at((idx() + 1))->ofs_body() - ofs_body());
    }
    f_len_body = true;
    return m_len_body;
}

std::string chrome_pak_t::resource_t::body() {
    if (f_body)
        return m_body;
    n_body = true;
    if (has_body()) {
        n_body = false;
        std::streampos _pos = m__io->pos();
        m__io->seek(ofs_body());
        m_body = m__io->read_bytes(len_body());
        m__io->seek(_pos);
        f_body = true;
    }
    return m_body;
}

chrome_pak_t::alias_t::alias_t(kaitai::kstream* p__io, chrome_pak_t* p__parent, chrome_pak_t* p__root) : kaitai::kstruct(p__io) {
    m__parent = p__parent;
    m__root = p__root;
    f_resource = false;
    _read();
}

void chrome_pak_t::alias_t::_read() {
    m_id = m__io->read_u2le();
    m_resource_idx = m__io->read_u2le();
    if (!(resource_idx() <= (_parent()->num_resources() - 1))) {
        throw kaitai::validation_greater_than_error<uint16_t>((_parent()->num_resources() - 1), resource_idx(), _io(), std::string("/types/alias/seq/1"));
    }
}

chrome_pak_t::alias_t::~alias_t() {
    _clean_up();
}

void chrome_pak_t::alias_t::_clean_up() {
}

chrome_pak_t::resource_t* chrome_pak_t::alias_t::resource() {
    if (f_resource)
        return m_resource;
    m_resource = _parent()->resources()->at(resource_idx());
    f_resource = true;
    return m_resource;
}

uint32_t chrome_pak_t::num_resources() {
    if (f_num_resources)
        return m_num_resources;
    m_num_resources = ((version() == 5) ? (v5_part()->num_resources()) : (num_resources_v4()));
    f_num_resources = true;
    return m_num_resources;
}

uint16_t chrome_pak_t::num_aliases() {
    if (f_num_aliases)
        return m_num_aliases;
    m_num_aliases = ((version() == 5) ? (v5_part()->num_aliases()) : (0));
    f_num_aliases = true;
    return m_num_aliases;
}