Format mostly used by Google Chrome and various Android apps to store resources such as translated strings, help messages and images.
This page hosts a formal specification of Chrome PAK serialization format using Kaitai Struct. This specification can be automatically translated into a variety of programming languages to get a parsing library.
All parsing code for C++11/STL generated by Kaitai Struct depends on the C++/STL runtime library. You have to install it before you can parse data.
For C++, the easiest way is to clone the runtime library sources and build them along with your project.
Using Kaitai Struct in C++/STL usually consists of 3 steps.
std::istream
). One can open local file for that, or use existing std::string
or char*
buffer.
#include <fstream>
std::ifstream is("path/to/local/file.pak", std::ifstream::binary);
#include <sstream>
std::istringstream is(str);
#include <sstream>
const char buf[] = { ... };
std::string str(buf, sizeof buf);
std::istringstream is(str);
#include "kaitai/kaitaistream.h"
kaitai::kstream ks(&is);
chrome_pak_t data(&ks);
After that, one can get various attributes from the structure by invoking getter methods like:
data.version() // => only versions 4 and 5 are supported
#pragma once
// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild
#include "kaitai/kaitaistruct.h"
#include <stdint.h>
#include <memory>
#include <vector>
#if KAITAI_STRUCT_VERSION < 9000L
#error "Incompatible Kaitai Struct C++/STL API: version 0.9 or later is required"
#endif
/**
* Format mostly used by Google Chrome and various Android apps to store
* resources such as translated strings, help messages and images.
* \sa https://web.archive.org/web/20220126211447/https://dev.chromium.org/developers/design-documents/linuxresourcesandlocalizedstrings Source
* \sa https://chromium.googlesource.com/chromium/src/tools/grit/+/3c36f27/grit/format/data_pack.py Source
* \sa https://chromium.googlesource.com/chromium/src/tools/grit/+/8a23eae/grit/format/data_pack.py Source
*/
class chrome_pak_t : public kaitai::kstruct {
public:
class header_v5_part_t;
class resource_t;
class alias_t;
enum encodings_t {
ENCODINGS_BINARY = 0,
ENCODINGS_UTF8 = 1,
ENCODINGS_UTF16 = 2
};
chrome_pak_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent = nullptr, chrome_pak_t* p__root = nullptr);
private:
void _read();
void _clean_up();
public:
~chrome_pak_t();
class header_v5_part_t : public kaitai::kstruct {
public:
header_v5_part_t(kaitai::kstream* p__io, chrome_pak_t* p__parent = nullptr, chrome_pak_t* p__root = nullptr);
private:
void _read();
void _clean_up();
public:
~header_v5_part_t();
private:
std::string m_encoding_padding;
uint16_t m_num_resources;
uint16_t m_num_aliases;
chrome_pak_t* m__root;
chrome_pak_t* m__parent;
public:
std::string encoding_padding() const { return m_encoding_padding; }
uint16_t num_resources() const { return m_num_resources; }
uint16_t num_aliases() const { return m_num_aliases; }
chrome_pak_t* _root() const { return m__root; }
chrome_pak_t* _parent() const { return m__parent; }
};
class resource_t : public kaitai::kstruct {
public:
resource_t(int32_t p_idx, bool p_has_body, kaitai::kstream* p__io, chrome_pak_t* p__parent = nullptr, chrome_pak_t* p__root = nullptr);
private:
void _read();
void _clean_up();
public:
~resource_t();
private:
bool f_len_body;
int32_t m_len_body;
bool n_len_body;
public:
bool _is_null_len_body() { len_body(); return n_len_body; };
private:
public:
/**
* MUST NOT be accessed until the next `resource` is parsed
*/
int32_t len_body();
private:
bool f_body;
std::string m_body;
bool n_body;
public:
bool _is_null_body() { body(); return n_body; };
private:
public:
/**
* MUST NOT be accessed until the next `resource` is parsed
*/
std::string body();
private:
uint16_t m_id;
uint32_t m_ofs_body;
int32_t m_idx;
bool m_has_body;
chrome_pak_t* m__root;
chrome_pak_t* m__parent;
public:
uint16_t id() const { return m_id; }
uint32_t ofs_body() const { return m_ofs_body; }
int32_t idx() const { return m_idx; }
bool has_body() const { return m_has_body; }
chrome_pak_t* _root() const { return m__root; }
chrome_pak_t* _parent() const { return m__parent; }
};
class alias_t : public kaitai::kstruct {
public:
alias_t(kaitai::kstream* p__io, chrome_pak_t* p__parent = nullptr, chrome_pak_t* p__root = nullptr);
private:
void _read();
void _clean_up();
public:
~alias_t();
private:
bool f_resource;
resource_t* m_resource;
public:
resource_t* resource();
private:
uint16_t m_id;
uint16_t m_resource_idx;
chrome_pak_t* m__root;
chrome_pak_t* m__parent;
public:
uint16_t id() const { return m_id; }
uint16_t resource_idx() const { return m_resource_idx; }
chrome_pak_t* _root() const { return m__root; }
chrome_pak_t* _parent() const { return m__parent; }
};
private:
bool f_num_resources;
uint32_t m_num_resources;
public:
uint32_t num_resources();
private:
bool f_num_aliases;
uint16_t m_num_aliases;
public:
uint16_t num_aliases();
private:
uint32_t m_version;
uint32_t m_num_resources_v4;
bool n_num_resources_v4;
public:
bool _is_null_num_resources_v4() { num_resources_v4(); return n_num_resources_v4; };
private:
encodings_t m_encoding;
std::unique_ptr<header_v5_part_t> m_v5_part;
bool n_v5_part;
public:
bool _is_null_v5_part() { v5_part(); return n_v5_part; };
private:
std::unique_ptr<std::vector<std::unique_ptr<resource_t>>> m_resources;
std::unique_ptr<std::vector<std::unique_ptr<alias_t>>> m_aliases;
chrome_pak_t* m__root;
kaitai::kstruct* m__parent;
public:
/**
* only versions 4 and 5 are supported
*/
uint32_t version() const { return m_version; }
uint32_t num_resources_v4() const { return m_num_resources_v4; }
/**
* Character encoding of all text resources in the PAK file. Note that
* the file can **always** contain binary resources, this only applies to
* those that are supposed to hold text.
*
* In practice, this will probably always be `encodings::utf8` - I haven't
* seen any organic file that would state otherwise. `UTF8` is also usually
* hardcoded in Python scripts from the GRIT repository that generate .pak
* files (for example
* [`pak_util.py:79`](https://chromium.googlesource.com/chromium/src/tools/grit/+/8a23eae/pak_util.py#79)).
*/
encodings_t encoding() const { return m_encoding; }
header_v5_part_t* v5_part() const { return m_v5_part.get(); }
/**
* The length is calculated by looking at the offset of
* the next item, so an extra entry is stored with id 0
* and offset pointing to the end of the resources.
*/
std::vector<std::unique_ptr<resource_t>>* resources() const { return m_resources.get(); }
std::vector<std::unique_ptr<alias_t>>* aliases() const { return m_aliases.get(); }
chrome_pak_t* _root() const { return m__root; }
kaitai::kstruct* _parent() const { return m__parent; }
};
// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild
#include "chrome_pak.h"
#include "kaitai/exceptions.h"
chrome_pak_t::chrome_pak_t(kaitai::kstream* p__io, kaitai::kstruct* p__parent, chrome_pak_t* p__root) : kaitai::kstruct(p__io) {
m__parent = p__parent;
m__root = this;
m_v5_part = nullptr;
m_resources = nullptr;
m_aliases = nullptr;
f_num_resources = false;
f_num_aliases = false;
_read();
}
void chrome_pak_t::_read() {
m_version = m__io->read_u4le();
if (!( ((version() == 4) || (version() == 5)) )) {
throw kaitai::validation_not_any_of_error<uint32_t>(version(), _io(), std::string("/seq/0"));
}
n_num_resources_v4 = true;
if (version() == 4) {
n_num_resources_v4 = false;
m_num_resources_v4 = m__io->read_u4le();
}
m_encoding = static_cast<chrome_pak_t::encodings_t>(m__io->read_u1());
n_v5_part = true;
if (version() == 5) {
n_v5_part = false;
m_v5_part = std::unique_ptr<header_v5_part_t>(new header_v5_part_t(m__io, this, m__root));
}
m_resources = std::unique_ptr<std::vector<std::unique_ptr<resource_t>>>(new std::vector<std::unique_ptr<resource_t>>());
const int l_resources = (num_resources() + 1);
for (int i = 0; i < l_resources; i++) {
m_resources->push_back(std::move(std::unique_ptr<resource_t>(new resource_t(i, i < num_resources(), m__io, this, m__root))));
}
m_aliases = std::unique_ptr<std::vector<std::unique_ptr<alias_t>>>(new std::vector<std::unique_ptr<alias_t>>());
const int l_aliases = num_aliases();
for (int i = 0; i < l_aliases; i++) {
m_aliases->push_back(std::move(std::unique_ptr<alias_t>(new alias_t(m__io, this, m__root))));
}
}
chrome_pak_t::~chrome_pak_t() {
_clean_up();
}
void chrome_pak_t::_clean_up() {
if (!n_num_resources_v4) {
}
if (!n_v5_part) {
}
}
chrome_pak_t::header_v5_part_t::header_v5_part_t(kaitai::kstream* p__io, chrome_pak_t* p__parent, chrome_pak_t* p__root) : kaitai::kstruct(p__io) {
m__parent = p__parent;
m__root = p__root;
_read();
}
void chrome_pak_t::header_v5_part_t::_read() {
m_encoding_padding = m__io->read_bytes(3);
m_num_resources = m__io->read_u2le();
m_num_aliases = m__io->read_u2le();
}
chrome_pak_t::header_v5_part_t::~header_v5_part_t() {
_clean_up();
}
void chrome_pak_t::header_v5_part_t::_clean_up() {
}
chrome_pak_t::resource_t::resource_t(int32_t p_idx, bool p_has_body, kaitai::kstream* p__io, chrome_pak_t* p__parent, chrome_pak_t* p__root) : kaitai::kstruct(p__io) {
m__parent = p__parent;
m__root = p__root;
m_idx = p_idx;
m_has_body = p_has_body;
f_len_body = false;
f_body = false;
_read();
}
void chrome_pak_t::resource_t::_read() {
m_id = m__io->read_u2le();
m_ofs_body = m__io->read_u4le();
}
chrome_pak_t::resource_t::~resource_t() {
_clean_up();
}
void chrome_pak_t::resource_t::_clean_up() {
if (f_body && !n_body) {
}
}
int32_t chrome_pak_t::resource_t::len_body() {
if (f_len_body)
return m_len_body;
n_len_body = true;
if (has_body()) {
n_len_body = false;
m_len_body = (_parent()->resources()->at((idx() + 1))->ofs_body() - ofs_body());
}
f_len_body = true;
return m_len_body;
}
std::string chrome_pak_t::resource_t::body() {
if (f_body)
return m_body;
n_body = true;
if (has_body()) {
n_body = false;
std::streampos _pos = m__io->pos();
m__io->seek(ofs_body());
m_body = m__io->read_bytes(len_body());
m__io->seek(_pos);
f_body = true;
}
return m_body;
}
chrome_pak_t::alias_t::alias_t(kaitai::kstream* p__io, chrome_pak_t* p__parent, chrome_pak_t* p__root) : kaitai::kstruct(p__io) {
m__parent = p__parent;
m__root = p__root;
f_resource = false;
_read();
}
void chrome_pak_t::alias_t::_read() {
m_id = m__io->read_u2le();
m_resource_idx = m__io->read_u2le();
if (!(resource_idx() <= (_parent()->num_resources() - 1))) {
throw kaitai::validation_greater_than_error<uint16_t>((_parent()->num_resources() - 1), resource_idx(), _io(), std::string("/types/alias/seq/1"));
}
}
chrome_pak_t::alias_t::~alias_t() {
_clean_up();
}
void chrome_pak_t::alias_t::_clean_up() {
}
chrome_pak_t::resource_t* chrome_pak_t::alias_t::resource() {
if (f_resource)
return m_resource;
m_resource = _parent()->resources()->at(resource_idx());
f_resource = true;
return m_resource;
}
uint32_t chrome_pak_t::num_resources() {
if (f_num_resources)
return m_num_resources;
m_num_resources = ((version() == 5) ? (v5_part()->num_resources()) : (num_resources_v4()));
f_num_resources = true;
return m_num_resources;
}
uint16_t chrome_pak_t::num_aliases() {
if (f_num_aliases)
return m_num_aliases;
m_num_aliases = ((version() == 5) ? (v5_part()->num_aliases()) : (0));
f_num_aliases = true;
return m_num_aliases;
}